diff options
Diffstat (limited to 'src/video_core')
51 files changed, 1164 insertions, 479 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 099bb446e..21c46a567 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -3,6 +3,8 @@ add_library(video_core STATIC | |||
| 3 | buffer_cache/buffer_cache.h | 3 | buffer_cache/buffer_cache.h |
| 4 | buffer_cache/map_interval.cpp | 4 | buffer_cache/map_interval.cpp |
| 5 | buffer_cache/map_interval.h | 5 | buffer_cache/map_interval.h |
| 6 | compatible_formats.cpp | ||
| 7 | compatible_formats.h | ||
| 6 | dirty_flags.cpp | 8 | dirty_flags.cpp |
| 7 | dirty_flags.h | 9 | dirty_flags.h |
| 8 | dma_pusher.cpp | 10 | dma_pusher.cpp |
| @@ -27,6 +29,8 @@ add_library(video_core STATIC | |||
| 27 | engines/shader_type.h | 29 | engines/shader_type.h |
| 28 | macro/macro.cpp | 30 | macro/macro.cpp |
| 29 | macro/macro.h | 31 | macro/macro.h |
| 32 | macro/macro_hle.cpp | ||
| 33 | macro/macro_hle.h | ||
| 30 | macro/macro_interpreter.cpp | 34 | macro/macro_interpreter.cpp |
| 31 | macro/macro_interpreter.h | 35 | macro/macro_interpreter.h |
| 32 | macro/macro_jit_x64.cpp | 36 | macro/macro_jit_x64.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 308d8b55f..c6479af9f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -41,16 +41,20 @@ class BufferCache { | |||
| 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; | 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; |
| 42 | 42 | ||
| 43 | public: | 43 | public: |
| 44 | using BufferInfo = std::pair<BufferType, u64>; | 44 | struct BufferInfo { |
| 45 | BufferType handle; | ||
| 46 | u64 offset; | ||
| 47 | u64 address; | ||
| 48 | }; | ||
| 45 | 49 | ||
| 46 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | 50 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, |
| 47 | bool is_written = false, bool use_fast_cbuf = false) { | 51 | bool is_written = false, bool use_fast_cbuf = false) { |
| 48 | std::lock_guard lock{mutex}; | 52 | std::lock_guard lock{mutex}; |
| 49 | 53 | ||
| 50 | const auto& memory_manager = system.GPU().MemoryManager(); | 54 | auto& memory_manager = system.GPU().MemoryManager(); |
| 51 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); | 55 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); |
| 52 | if (!cpu_addr_opt) { | 56 | if (!cpu_addr_opt) { |
| 53 | return {GetEmptyBuffer(size), 0}; | 57 | return GetEmptyBuffer(size); |
| 54 | } | 58 | } |
| 55 | const VAddr cpu_addr = *cpu_addr_opt; | 59 | const VAddr cpu_addr = *cpu_addr_opt; |
| 56 | 60 | ||
| @@ -59,7 +63,6 @@ public: | |||
| 59 | constexpr std::size_t max_stream_size = 0x800; | 63 | constexpr std::size_t max_stream_size = 0x800; |
| 60 | if (use_fast_cbuf || size < max_stream_size) { | 64 | if (use_fast_cbuf || size < max_stream_size) { |
| 61 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { | 65 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
| 62 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 63 | const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); | 66 | const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); |
| 64 | if (use_fast_cbuf) { | 67 | if (use_fast_cbuf) { |
| 65 | u8* dest; | 68 | u8* dest; |
| @@ -89,7 +92,7 @@ public: | |||
| 89 | Buffer* const block = GetBlock(cpu_addr, size); | 92 | Buffer* const block = GetBlock(cpu_addr, size); |
| 90 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); | 93 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 91 | if (!map) { | 94 | if (!map) { |
| 92 | return {GetEmptyBuffer(size), 0}; | 95 | return GetEmptyBuffer(size); |
| 93 | } | 96 | } |
| 94 | if (is_written) { | 97 | if (is_written) { |
| 95 | map->MarkAsModified(true, GetModifiedTicks()); | 98 | map->MarkAsModified(true, GetModifiedTicks()); |
| @@ -102,7 +105,7 @@ public: | |||
| 102 | } | 105 | } |
| 103 | } | 106 | } |
| 104 | 107 | ||
| 105 | return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; | 108 | return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; |
| 106 | } | 109 | } |
| 107 | 110 | ||
| 108 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | 111 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
| @@ -255,27 +258,17 @@ public: | |||
| 255 | committed_flushes.pop_front(); | 258 | committed_flushes.pop_front(); |
| 256 | } | 259 | } |
| 257 | 260 | ||
| 258 | virtual BufferType GetEmptyBuffer(std::size_t size) = 0; | 261 | virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; |
| 259 | 262 | ||
| 260 | protected: | 263 | protected: |
| 261 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 264 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 262 | std::unique_ptr<StreamBuffer> stream_buffer_) | 265 | std::unique_ptr<StreamBuffer> stream_buffer) |
| 263 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, | 266 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} |
| 264 | stream_buffer_handle{stream_buffer->Handle()} {} | ||
| 265 | 267 | ||
| 266 | ~BufferCache() = default; | 268 | ~BufferCache() = default; |
| 267 | 269 | ||
| 268 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | 270 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
| 269 | 271 | ||
| 270 | virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 271 | const u8* data) = 0; | ||
| 272 | |||
| 273 | virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 274 | u8* data) = 0; | ||
| 275 | |||
| 276 | virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 277 | std::size_t dst_offset, std::size_t size) = 0; | ||
| 278 | |||
| 279 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | 272 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { |
| 280 | return {}; | 273 | return {}; |
| 281 | } | 274 | } |
| @@ -329,19 +322,18 @@ protected: | |||
| 329 | } | 322 | } |
| 330 | 323 | ||
| 331 | private: | 324 | private: |
| 332 | MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, | 325 | MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { |
| 333 | std::size_t size) { | ||
| 334 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); | 326 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); |
| 335 | if (overlaps.empty()) { | 327 | if (overlaps.empty()) { |
| 336 | auto& memory_manager = system.GPU().MemoryManager(); | 328 | auto& memory_manager = system.GPU().MemoryManager(); |
| 337 | const VAddr cpu_addr_end = cpu_addr + size; | 329 | const VAddr cpu_addr_end = cpu_addr + size; |
| 338 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 330 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 339 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | 331 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
| 340 | UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); | 332 | block->Upload(block->Offset(cpu_addr), size, host_ptr); |
| 341 | } else { | 333 | } else { |
| 342 | staging_buffer.resize(size); | 334 | staging_buffer.resize(size); |
| 343 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 335 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
| 344 | UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); | 336 | block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); |
| 345 | } | 337 | } |
| 346 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); | 338 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); |
| 347 | } | 339 | } |
| @@ -384,8 +376,7 @@ private: | |||
| 384 | return map; | 376 | return map; |
| 385 | } | 377 | } |
| 386 | 378 | ||
| 387 | void UpdateBlock(const Buffer* block, VAddr start, VAddr end, | 379 | void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { |
| 388 | const VectorMapInterval& overlaps) { | ||
| 389 | const IntervalType base_interval{start, end}; | 380 | const IntervalType base_interval{start, end}; |
| 390 | IntervalSet interval_set{}; | 381 | IntervalSet interval_set{}; |
| 391 | interval_set.add(base_interval); | 382 | interval_set.add(base_interval); |
| @@ -400,7 +391,7 @@ private: | |||
| 400 | } | 391 | } |
| 401 | staging_buffer.resize(size); | 392 | staging_buffer.resize(size); |
| 402 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | 393 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); |
| 403 | UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); | 394 | block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); |
| 404 | } | 395 | } |
| 405 | } | 396 | } |
| 406 | 397 | ||
| @@ -437,7 +428,7 @@ private: | |||
| 437 | 428 | ||
| 438 | const std::size_t size = map->end - map->start; | 429 | const std::size_t size = map->end - map->start; |
| 439 | staging_buffer.resize(size); | 430 | staging_buffer.resize(size); |
| 440 | DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); | 431 | block->Download(block->Offset(map->start), size, staging_buffer.data()); |
| 441 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); | 432 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); |
| 442 | map->MarkAsModified(false, 0); | 433 | map->MarkAsModified(false, 0); |
| 443 | } | 434 | } |
| @@ -450,7 +441,7 @@ private: | |||
| 450 | 441 | ||
| 451 | buffer_ptr += size; | 442 | buffer_ptr += size; |
| 452 | buffer_offset += size; | 443 | buffer_offset += size; |
| 453 | return {stream_buffer_handle, uploaded_offset}; | 444 | return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()}; |
| 454 | } | 445 | } |
| 455 | 446 | ||
| 456 | void AlignBuffer(std::size_t alignment) { | 447 | void AlignBuffer(std::size_t alignment) { |
| @@ -465,7 +456,7 @@ private: | |||
| 465 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; | 456 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; |
| 466 | const VAddr cpu_addr = buffer->CpuAddr(); | 457 | const VAddr cpu_addr = buffer->CpuAddr(); |
| 467 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); | 458 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); |
| 468 | CopyBlock(*buffer, *new_buffer, 0, 0, old_size); | 459 | new_buffer->CopyFrom(*buffer, 0, 0, old_size); |
| 469 | QueueDestruction(std::move(buffer)); | 460 | QueueDestruction(std::move(buffer)); |
| 470 | 461 | ||
| 471 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; | 462 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
| @@ -487,8 +478,8 @@ private: | |||
| 487 | const std::size_t new_size = size_1 + size_2; | 478 | const std::size_t new_size = size_1 + size_2; |
| 488 | 479 | ||
| 489 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); | 480 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); |
| 490 | CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); | 481 | new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); |
| 491 | CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); | 482 | new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); |
| 492 | QueueDestruction(std::move(first)); | 483 | QueueDestruction(std::move(first)); |
| 493 | QueueDestruction(std::move(second)); | 484 | QueueDestruction(std::move(second)); |
| 494 | 485 | ||
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp new file mode 100644 index 000000000..6c426b035 --- /dev/null +++ b/src/video_core/compatible_formats.cpp | |||
| @@ -0,0 +1,162 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bitset> | ||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include "video_core/compatible_formats.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | |||
| 12 | namespace VideoCore::Surface { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | |||
| 16 | // Compatibility table taken from Table 3.X.2 in: | ||
| 17 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt | ||
| 18 | |||
| 19 | constexpr std::array VIEW_CLASS_128_BITS = { | ||
| 20 | PixelFormat::RGBA32F, | ||
| 21 | PixelFormat::RGBA32UI, | ||
| 22 | }; | ||
| 23 | // Missing formats: | ||
| 24 | // PixelFormat::RGBA32I | ||
| 25 | |||
| 26 | constexpr std::array VIEW_CLASS_96_BITS = { | ||
| 27 | PixelFormat::RGB32F, | ||
| 28 | }; | ||
| 29 | // Missing formats: | ||
| 30 | // PixelFormat::RGB32UI, | ||
| 31 | // PixelFormat::RGB32I, | ||
| 32 | |||
| 33 | constexpr std::array VIEW_CLASS_64_BITS = { | ||
| 34 | PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, | ||
| 35 | PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S, | ||
| 36 | }; | ||
| 37 | // Missing formats: | ||
| 38 | // PixelFormat::RGBA16I | ||
| 39 | // PixelFormat::RG32I | ||
| 40 | |||
| 41 | // TODO: How should we handle 48 bits? | ||
| 42 | |||
| 43 | constexpr std::array VIEW_CLASS_32_BITS = { | ||
| 44 | PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F, | ||
| 45 | PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI, | ||
| 46 | PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U, | ||
| 47 | PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S, | ||
| 48 | PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8, | ||
| 49 | PixelFormat::BGRA8_SRGB, | ||
| 50 | }; | ||
| 51 | // Missing formats: | ||
| 52 | // PixelFormat::RGBA8UI | ||
| 53 | // PixelFormat::RGBA8I | ||
| 54 | // PixelFormat::RGB10_A2_UI | ||
| 55 | |||
| 56 | // TODO: How should we handle 24 bits? | ||
| 57 | |||
| 58 | constexpr std::array VIEW_CLASS_16_BITS = { | ||
| 59 | PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I, | ||
| 60 | PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S, | ||
| 61 | }; | ||
| 62 | // Missing formats: | ||
| 63 | // PixelFormat::RG8I | ||
| 64 | |||
| 65 | constexpr std::array VIEW_CLASS_8_BITS = { | ||
| 66 | PixelFormat::R8UI, | ||
| 67 | PixelFormat::R8U, | ||
| 68 | }; | ||
| 69 | // Missing formats: | ||
| 70 | // PixelFormat::R8I | ||
| 71 | // PixelFormat::R8S | ||
| 72 | |||
| 73 | constexpr std::array VIEW_CLASS_RGTC1_RED = { | ||
| 74 | PixelFormat::DXN1, | ||
| 75 | }; | ||
| 76 | // Missing formats: | ||
| 77 | // COMPRESSED_SIGNED_RED_RGTC1 | ||
| 78 | |||
| 79 | constexpr std::array VIEW_CLASS_RGTC2_RG = { | ||
| 80 | PixelFormat::DXN2UNORM, | ||
| 81 | PixelFormat::DXN2SNORM, | ||
| 82 | }; | ||
| 83 | |||
| 84 | constexpr std::array VIEW_CLASS_BPTC_UNORM = { | ||
| 85 | PixelFormat::BC7U, | ||
| 86 | PixelFormat::BC7U_SRGB, | ||
| 87 | }; | ||
| 88 | |||
| 89 | constexpr std::array VIEW_CLASS_BPTC_FLOAT = { | ||
| 90 | PixelFormat::BC6H_SF16, | ||
| 91 | PixelFormat::BC6H_UF16, | ||
| 92 | }; | ||
| 93 | |||
| 94 | // Compatibility table taken from Table 4.X.1 in: | ||
| 95 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt | ||
| 96 | |||
| 97 | constexpr std::array COPY_CLASS_128_BITS = { | ||
| 98 | PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23, | ||
| 99 | PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB, | ||
| 100 | PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB, | ||
| 101 | PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16, | ||
| 102 | }; | ||
| 103 | // Missing formats: | ||
| 104 | // PixelFormat::RGBA32I | ||
| 105 | // COMPRESSED_RG_RGTC2 | ||
| 106 | |||
| 107 | constexpr std::array COPY_CLASS_64_BITS = { | ||
| 108 | PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI, | ||
| 109 | PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1, | ||
| 110 | |||
| 111 | }; | ||
| 112 | // Missing formats: | ||
| 113 | // PixelFormat::RGBA16I | ||
| 114 | // PixelFormat::RG32I, | ||
| 115 | // COMPRESSED_RGB_S3TC_DXT1_EXT | ||
| 116 | // COMPRESSED_SRGB_S3TC_DXT1_EXT | ||
| 117 | // COMPRESSED_RGBA_S3TC_DXT1_EXT | ||
| 118 | // COMPRESSED_SIGNED_RED_RGTC1 | ||
| 119 | |||
| 120 | void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) { | ||
| 121 | compatiblity[format_a][format_b] = true; | ||
| 122 | compatiblity[format_b][format_a] = true; | ||
| 123 | } | ||
| 124 | |||
| 125 | void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) { | ||
| 126 | Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b)); | ||
| 127 | } | ||
| 128 | |||
| 129 | template <typename Range> | ||
| 130 | void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) { | ||
| 131 | for (auto it_a = range.begin(); it_a != range.end(); ++it_a) { | ||
| 132 | for (auto it_b = it_a; it_b != range.end(); ++it_b) { | ||
| 133 | Enable(compatibility, *it_a, *it_b); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | } // Anonymous namespace | ||
| 139 | |||
| 140 | FormatCompatibility::FormatCompatibility() { | ||
| 141 | for (size_t i = 0; i < MaxPixelFormat; ++i) { | ||
| 142 | // Identity is allowed | ||
| 143 | Enable(view, i, i); | ||
| 144 | } | ||
| 145 | |||
| 146 | EnableRange(view, VIEW_CLASS_128_BITS); | ||
| 147 | EnableRange(view, VIEW_CLASS_96_BITS); | ||
| 148 | EnableRange(view, VIEW_CLASS_64_BITS); | ||
| 149 | EnableRange(view, VIEW_CLASS_32_BITS); | ||
| 150 | EnableRange(view, VIEW_CLASS_16_BITS); | ||
| 151 | EnableRange(view, VIEW_CLASS_8_BITS); | ||
| 152 | EnableRange(view, VIEW_CLASS_RGTC1_RED); | ||
| 153 | EnableRange(view, VIEW_CLASS_RGTC2_RG); | ||
| 154 | EnableRange(view, VIEW_CLASS_BPTC_UNORM); | ||
| 155 | EnableRange(view, VIEW_CLASS_BPTC_FLOAT); | ||
| 156 | |||
| 157 | copy = view; | ||
| 158 | EnableRange(copy, COPY_CLASS_128_BITS); | ||
| 159 | EnableRange(copy, COPY_CLASS_64_BITS); | ||
| 160 | } | ||
| 161 | |||
| 162 | } // namespace VideoCore::Surface | ||
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h new file mode 100644 index 000000000..d1082566d --- /dev/null +++ b/src/video_core/compatible_formats.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bitset> | ||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include "video_core/surface.h" | ||
| 10 | |||
| 11 | namespace VideoCore::Surface { | ||
| 12 | |||
| 13 | class FormatCompatibility { | ||
| 14 | public: | ||
| 15 | using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>; | ||
| 16 | |||
| 17 | explicit FormatCompatibility(); | ||
| 18 | |||
| 19 | bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept { | ||
| 20 | return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; | ||
| 21 | } | ||
| 22 | |||
| 23 | bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept { | ||
| 24 | return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)]; | ||
| 25 | } | ||
| 26 | |||
| 27 | private: | ||
| 28 | Table view; | ||
| 29 | Table copy; | ||
| 30 | }; | ||
| 31 | |||
| 32 | } // namespace VideoCore::Surface | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ea3c8a963..c01436295 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) | |||
| 128 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); | 128 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); |
| 129 | 129 | ||
| 130 | // Execute the current macro. | 130 | // Execute the current macro. |
| 131 | macro_engine->Execute(macro_positions[entry], parameters); | 131 | macro_engine->Execute(*this, macro_positions[entry], parameters); |
| 132 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { | 132 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { |
| 133 | FlushMMEInlineDraw(); | 133 | FlushMMEInlineDraw(); |
| 134 | } | 134 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d5fe25065..ef1618990 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1418,6 +1418,14 @@ public: | |||
| 1418 | return execute_on; | 1418 | return execute_on; |
| 1419 | } | 1419 | } |
| 1420 | 1420 | ||
| 1421 | VideoCore::RasterizerInterface& GetRasterizer() { | ||
| 1422 | return rasterizer; | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | const VideoCore::RasterizerInterface& GetRasterizer() const { | ||
| 1426 | return rasterizer; | ||
| 1427 | } | ||
| 1428 | |||
| 1421 | /// Notify a memory write has happened. | 1429 | /// Notify a memory write has happened. |
| 1422 | void OnMemoryWrite() { | 1430 | void OnMemoryWrite() { |
| 1423 | dirty.flags |= dirty.on_write_stores; | 1431 | dirty.flags |= dirty.on_write_stores; |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e7cb87589..d374b73cf 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -661,6 +661,10 @@ union Instruction { | |||
| 661 | constexpr Instruction(u64 value) : value{value} {} | 661 | constexpr Instruction(u64 value) : value{value} {} |
| 662 | constexpr Instruction(const Instruction& instr) : value(instr.value) {} | 662 | constexpr Instruction(const Instruction& instr) : value(instr.value) {} |
| 663 | 663 | ||
| 664 | constexpr bool Bit(u64 offset) const { | ||
| 665 | return ((value >> offset) & 1) != 0; | ||
| 666 | } | ||
| 667 | |||
| 664 | BitField<0, 8, Register> gpr0; | 668 | BitField<0, 8, Register> gpr0; |
| 665 | BitField<8, 8, Register> gpr8; | 669 | BitField<8, 8, Register> gpr8; |
| 666 | union { | 670 | union { |
| @@ -1874,7 +1878,9 @@ public: | |||
| 1874 | HSETP2_C, | 1878 | HSETP2_C, |
| 1875 | HSETP2_R, | 1879 | HSETP2_R, |
| 1876 | HSETP2_IMM, | 1880 | HSETP2_IMM, |
| 1881 | HSET2_C, | ||
| 1877 | HSET2_R, | 1882 | HSET2_R, |
| 1883 | HSET2_IMM, | ||
| 1878 | POPC_C, | 1884 | POPC_C, |
| 1879 | POPC_R, | 1885 | POPC_R, |
| 1880 | POPC_IMM, | 1886 | POPC_IMM, |
| @@ -2194,7 +2200,9 @@ private: | |||
| 2194 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), | 2200 | INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), |
| 2195 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | 2201 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), |
| 2196 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | 2202 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), |
| 2203 | INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), | ||
| 2197 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 2204 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 2205 | INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), | ||
| 2198 | INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), | 2206 | INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), |
| 2199 | INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), | 2207 | INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), |
| 2200 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 2208 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8eb017f65..482e49711 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -2,6 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <chrono> | ||
| 6 | |||
| 5 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 8 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 9 | #include "core/core.h" |
| @@ -154,8 +156,7 @@ u64 GPU::GetTicks() const { | |||
| 154 | constexpr u64 gpu_ticks_num = 384; | 156 | constexpr u64 gpu_ticks_num = 384; |
| 155 | constexpr u64 gpu_ticks_den = 625; | 157 | constexpr u64 gpu_ticks_den = 625; |
| 156 | 158 | ||
| 157 | const u64 cpu_ticks = system.CoreTiming().GetTicks(); | 159 | u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); |
| 158 | u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); | ||
| 159 | if (Settings::values.use_fast_gpu_time) { | 160 | if (Settings::values.use_fast_gpu_time) { |
| 160 | nanoseconds /= 256; | 161 | nanoseconds /= 256; |
| 161 | } | 162 | } |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a1b4c305c..2c42483bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -284,6 +284,12 @@ public: | |||
| 284 | /// core timing events. | 284 | /// core timing events. |
| 285 | virtual void Start() = 0; | 285 | virtual void Start() = 0; |
| 286 | 286 | ||
| 287 | /// Obtain the CPU Context | ||
| 288 | virtual void ObtainContext() = 0; | ||
| 289 | |||
| 290 | /// Release the CPU Context | ||
| 291 | virtual void ReleaseContext() = 0; | ||
| 292 | |||
| 287 | /// Push GPU command entries to be processed | 293 | /// Push GPU command entries to be processed |
| 288 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | 294 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; |
| 289 | 295 | ||
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 53305ab43..7b855f63e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa | |||
| 19 | GPUAsynch::~GPUAsynch() = default; | 19 | GPUAsynch::~GPUAsynch() = default; |
| 20 | 20 | ||
| 21 | void GPUAsynch::Start() { | 21 | void GPUAsynch::Start() { |
| 22 | cpu_context->MakeCurrent(); | ||
| 23 | gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); | 22 | gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); |
| 24 | } | 23 | } |
| 25 | 24 | ||
| 25 | void GPUAsynch::ObtainContext() { | ||
| 26 | cpu_context->MakeCurrent(); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUAsynch::ReleaseContext() { | ||
| 30 | cpu_context->DoneCurrent(); | ||
| 31 | } | ||
| 32 | |||
| 26 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | 33 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { |
| 27 | gpu_thread.SubmitList(std::move(entries)); | 34 | gpu_thread.SubmitList(std::move(entries)); |
| 28 | } | 35 | } |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 517658612..15e9f1d38 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -25,6 +25,8 @@ public: | |||
| 25 | ~GPUAsynch() override; | 25 | ~GPUAsynch() override; |
| 26 | 26 | ||
| 27 | void Start() override; | 27 | void Start() override; |
| 28 | void ObtainContext() override; | ||
| 29 | void ReleaseContext() override; | ||
| 28 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 30 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 29 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 31 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 30 | void FlushRegion(VAddr addr, u64 size) override; | 32 | void FlushRegion(VAddr addr, u64 size) override; |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 6f38a672a..aaeb9811d 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase | |||
| 13 | 13 | ||
| 14 | GPUSynch::~GPUSynch() = default; | 14 | GPUSynch::~GPUSynch() = default; |
| 15 | 15 | ||
| 16 | void GPUSynch::Start() { | 16 | void GPUSynch::Start() {} |
| 17 | |||
| 18 | void GPUSynch::ObtainContext() { | ||
| 17 | context->MakeCurrent(); | 19 | context->MakeCurrent(); |
| 18 | } | 20 | } |
| 19 | 21 | ||
| 22 | void GPUSynch::ReleaseContext() { | ||
| 23 | context->DoneCurrent(); | ||
| 24 | } | ||
| 25 | |||
| 20 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | 26 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { |
| 21 | dma_pusher->Push(std::move(entries)); | 27 | dma_pusher->Push(std::move(entries)); |
| 22 | dma_pusher->DispatchCalls(); | 28 | dma_pusher->DispatchCalls(); |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 4a6e9a01d..762c20aa5 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -24,6 +24,8 @@ public: | |||
| 24 | ~GPUSynch() override; | 24 | ~GPUSynch() override; |
| 25 | 25 | ||
| 26 | void Start() override; | 26 | void Start() override; |
| 27 | void ObtainContext() override; | ||
| 28 | void ReleaseContext() override; | ||
| 27 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 29 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 28 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 30 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 29 | void FlushRegion(VAddr addr, u64 size) override; | 31 | void FlushRegion(VAddr addr, u64 size) override; |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c3bb4fe06..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "common/thread.h" | ||
| 7 | #include "core/core.h" | 8 | #include "core/core.h" |
| 8 | #include "core/frontend/emu_window.h" | 9 | #include "core/frontend/emu_window.h" |
| 9 | #include "core/settings.h" | 10 | #include "core/settings.h" |
| @@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread { | |||
| 18 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | 19 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, |
| 19 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, | 20 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, |
| 20 | SynchState& state) { | 21 | SynchState& state) { |
| 21 | MicroProfileOnThreadCreate("GpuThread"); | 22 | std::string name = "yuzu:GPU"; |
| 23 | MicroProfileOnThreadCreate(name.c_str()); | ||
| 24 | Common::SetCurrentThreadName(name.c_str()); | ||
| 25 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | ||
| 26 | system.RegisterHostThread(); | ||
| 22 | 27 | ||
| 23 | // Wait for first GPU command before acquiring the window context | 28 | // Wait for first GPU command before acquiring the window context |
| 24 | while (state.queue.Empty()) | 29 | while (state.queue.Empty()) |
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index 89077a2d8..a50e7b4e0 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp | |||
| @@ -2,32 +2,78 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <optional> | ||
| 6 | #include <boost/container_hash/hash.hpp> | ||
| 5 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 7 | #include "core/settings.h" | 9 | #include "core/settings.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/macro/macro.h" | 11 | #include "video_core/macro/macro.h" |
| 12 | #include "video_core/macro/macro_hle.h" | ||
| 9 | #include "video_core/macro/macro_interpreter.h" | 13 | #include "video_core/macro/macro_interpreter.h" |
| 10 | #include "video_core/macro/macro_jit_x64.h" | 14 | #include "video_core/macro/macro_jit_x64.h" |
| 11 | 15 | ||
| 12 | namespace Tegra { | 16 | namespace Tegra { |
| 13 | 17 | ||
| 18 | MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d) | ||
| 19 | : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {} | ||
| 20 | |||
| 21 | MacroEngine::~MacroEngine() = default; | ||
| 22 | |||
| 14 | void MacroEngine::AddCode(u32 method, u32 data) { | 23 | void MacroEngine::AddCode(u32 method, u32 data) { |
| 15 | uploaded_macro_code[method].push_back(data); | 24 | uploaded_macro_code[method].push_back(data); |
| 16 | } | 25 | } |
| 17 | 26 | ||
| 18 | void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | 27 | void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, |
| 28 | const std::vector<u32>& parameters) { | ||
| 19 | auto compiled_macro = macro_cache.find(method); | 29 | auto compiled_macro = macro_cache.find(method); |
| 20 | if (compiled_macro != macro_cache.end()) { | 30 | if (compiled_macro != macro_cache.end()) { |
| 21 | compiled_macro->second->Execute(parameters, method); | 31 | const auto& cache_info = compiled_macro->second; |
| 32 | if (cache_info.has_hle_program) { | ||
| 33 | cache_info.hle_program->Execute(parameters, method); | ||
| 34 | } else { | ||
| 35 | cache_info.lle_program->Execute(parameters, method); | ||
| 36 | } | ||
| 22 | } else { | 37 | } else { |
| 23 | // Macro not compiled, check if it's uploaded and if so, compile it | 38 | // Macro not compiled, check if it's uploaded and if so, compile it |
| 24 | auto macro_code = uploaded_macro_code.find(method); | 39 | std::optional<u32> mid_method = std::nullopt; |
| 40 | const auto macro_code = uploaded_macro_code.find(method); | ||
| 25 | if (macro_code == uploaded_macro_code.end()) { | 41 | if (macro_code == uploaded_macro_code.end()) { |
| 26 | UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); | 42 | for (const auto& [method_base, code] : uploaded_macro_code) { |
| 27 | return; | 43 | if (method >= method_base && (method - method_base) < code.size()) { |
| 44 | mid_method = method_base; | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | if (!mid_method.has_value()) { | ||
| 49 | UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); | ||
| 50 | return; | ||
| 51 | } | ||
| 52 | } | ||
| 53 | auto& cache_info = macro_cache[method]; | ||
| 54 | |||
| 55 | if (!mid_method.has_value()) { | ||
| 56 | cache_info.lle_program = Compile(macro_code->second); | ||
| 57 | cache_info.hash = boost::hash_value(macro_code->second); | ||
| 58 | } else { | ||
| 59 | const auto& macro_cached = uploaded_macro_code[mid_method.value()]; | ||
| 60 | const auto rebased_method = method - mid_method.value(); | ||
| 61 | auto& code = uploaded_macro_code[method]; | ||
| 62 | code.resize(macro_cached.size() - rebased_method); | ||
| 63 | std::memcpy(code.data(), macro_cached.data() + rebased_method, | ||
| 64 | code.size() * sizeof(u32)); | ||
| 65 | cache_info.hash = boost::hash_value(code); | ||
| 66 | cache_info.lle_program = Compile(code); | ||
| 67 | } | ||
| 68 | |||
| 69 | auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); | ||
| 70 | if (hle_program.has_value()) { | ||
| 71 | cache_info.has_hle_program = true; | ||
| 72 | cache_info.hle_program = std::move(hle_program.value()); | ||
| 73 | cache_info.hle_program->Execute(parameters, method); | ||
| 74 | } else { | ||
| 75 | cache_info.lle_program->Execute(parameters, method); | ||
| 28 | } | 76 | } |
| 29 | macro_cache[method] = Compile(macro_code->second); | ||
| 30 | macro_cache[method]->Execute(parameters, method); | ||
| 31 | } | 77 | } |
| 32 | } | 78 | } |
| 33 | 79 | ||
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h index b76ed891f..4d00b84b0 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro/macro.h | |||
| @@ -11,9 +11,11 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | |||
| 14 | namespace Engines { | 15 | namespace Engines { |
| 15 | class Maxwell3D; | 16 | class Maxwell3D; |
| 16 | } | 17 | } |
| 18 | |||
| 17 | namespace Macro { | 19 | namespace Macro { |
| 18 | constexpr std::size_t NUM_MACRO_REGISTERS = 8; | 20 | constexpr std::size_t NUM_MACRO_REGISTERS = 8; |
| 19 | enum class Operation : u32 { | 21 | enum class Operation : u32 { |
| @@ -94,6 +96,8 @@ union MethodAddress { | |||
| 94 | 96 | ||
| 95 | } // namespace Macro | 97 | } // namespace Macro |
| 96 | 98 | ||
| 99 | class HLEMacro; | ||
| 100 | |||
| 97 | class CachedMacro { | 101 | class CachedMacro { |
| 98 | public: | 102 | public: |
| 99 | virtual ~CachedMacro() = default; | 103 | virtual ~CachedMacro() = default; |
| @@ -107,20 +111,29 @@ public: | |||
| 107 | 111 | ||
| 108 | class MacroEngine { | 112 | class MacroEngine { |
| 109 | public: | 113 | public: |
| 110 | virtual ~MacroEngine() = default; | 114 | explicit MacroEngine(Engines::Maxwell3D& maxwell3d); |
| 115 | virtual ~MacroEngine(); | ||
| 111 | 116 | ||
| 112 | // Store the uploaded macro code to compile them when they're called. | 117 | // Store the uploaded macro code to compile them when they're called. |
| 113 | void AddCode(u32 method, u32 data); | 118 | void AddCode(u32 method, u32 data); |
| 114 | 119 | ||
| 115 | // Compiles the macro if its not in the cache, and executes the compiled macro | 120 | // Compiles the macro if its not in the cache, and executes the compiled macro |
| 116 | void Execute(u32 method, const std::vector<u32>& parameters); | 121 | void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters); |
| 117 | 122 | ||
| 118 | protected: | 123 | protected: |
| 119 | virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; | 124 | virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; |
| 120 | 125 | ||
| 121 | private: | 126 | private: |
| 122 | std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; | 127 | struct CacheInfo { |
| 128 | std::unique_ptr<CachedMacro> lle_program{}; | ||
| 129 | std::unique_ptr<CachedMacro> hle_program{}; | ||
| 130 | u64 hash{}; | ||
| 131 | bool has_hle_program{}; | ||
| 132 | }; | ||
| 133 | |||
| 134 | std::unordered_map<u32, CacheInfo> macro_cache; | ||
| 123 | std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; | 135 | std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; |
| 136 | std::unique_ptr<HLEMacro> hle_macros; | ||
| 124 | }; | 137 | }; |
| 125 | 138 | ||
| 126 | std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); | 139 | std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); |
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp new file mode 100644 index 000000000..410f99018 --- /dev/null +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -0,0 +1,113 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <vector> | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/macro/macro_hle.h" | ||
| 9 | #include "video_core/rasterizer_interface.h" | ||
| 10 | |||
| 11 | namespace Tegra { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | // HLE'd functions | ||
| 15 | static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, | ||
| 16 | const std::vector<u32>& parameters) { | ||
| 17 | const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B); | ||
| 18 | |||
| 19 | maxwell3d.regs.draw.topology.Assign( | ||
| 20 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & | ||
| 21 | ~(0x3ffffff << 26))); | ||
| 22 | maxwell3d.regs.vb_base_instance = parameters[5]; | ||
| 23 | maxwell3d.mme_draw.instance_count = instance_count; | ||
| 24 | maxwell3d.regs.vb_element_base = parameters[3]; | ||
| 25 | maxwell3d.regs.index_array.count = parameters[1]; | ||
| 26 | maxwell3d.regs.index_array.first = parameters[4]; | ||
| 27 | |||
| 28 | if (maxwell3d.ShouldExecute()) { | ||
| 29 | maxwell3d.GetRasterizer().Draw(true, true); | ||
| 30 | } | ||
| 31 | maxwell3d.regs.index_array.count = 0; | ||
| 32 | maxwell3d.mme_draw.instance_count = 0; | ||
| 33 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||
| 34 | } | ||
| 35 | |||
| 36 | static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, | ||
| 37 | const std::vector<u32>& parameters) { | ||
| 38 | const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); | ||
| 39 | |||
| 40 | maxwell3d.regs.vertex_buffer.first = parameters[3]; | ||
| 41 | maxwell3d.regs.vertex_buffer.count = parameters[1]; | ||
| 42 | maxwell3d.regs.vb_base_instance = parameters[4]; | ||
| 43 | maxwell3d.regs.draw.topology.Assign( | ||
| 44 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); | ||
| 45 | maxwell3d.mme_draw.instance_count = count; | ||
| 46 | |||
| 47 | if (maxwell3d.ShouldExecute()) { | ||
| 48 | maxwell3d.GetRasterizer().Draw(false, true); | ||
| 49 | } | ||
| 50 | maxwell3d.regs.vertex_buffer.count = 0; | ||
| 51 | maxwell3d.mme_draw.instance_count = 0; | ||
| 52 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||
| 53 | } | ||
| 54 | |||
| 55 | static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, | ||
| 56 | const std::vector<u32>& parameters) { | ||
| 57 | const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); | ||
| 58 | const u32 element_base = parameters[4]; | ||
| 59 | const u32 base_instance = parameters[5]; | ||
| 60 | maxwell3d.regs.index_array.first = parameters[3]; | ||
| 61 | maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base? | ||
| 62 | maxwell3d.regs.index_array.count = parameters[1]; | ||
| 63 | maxwell3d.regs.vb_element_base = element_base; | ||
| 64 | maxwell3d.regs.vb_base_instance = base_instance; | ||
| 65 | maxwell3d.mme_draw.instance_count = instance_count; | ||
| 66 | maxwell3d.CallMethodFromMME(0x8e3, 0x640); | ||
| 67 | maxwell3d.CallMethodFromMME(0x8e4, element_base); | ||
| 68 | maxwell3d.CallMethodFromMME(0x8e5, base_instance); | ||
| 69 | maxwell3d.regs.draw.topology.Assign( | ||
| 70 | static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); | ||
| 71 | if (maxwell3d.ShouldExecute()) { | ||
| 72 | maxwell3d.GetRasterizer().Draw(true, true); | ||
| 73 | } | ||
| 74 | maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? | ||
| 75 | maxwell3d.regs.index_array.count = 0; | ||
| 76 | maxwell3d.regs.vb_element_base = 0x0; | ||
| 77 | maxwell3d.regs.vb_base_instance = 0x0; | ||
| 78 | maxwell3d.mme_draw.instance_count = 0; | ||
| 79 | maxwell3d.CallMethodFromMME(0x8e3, 0x640); | ||
| 80 | maxwell3d.CallMethodFromMME(0x8e4, 0x0); | ||
| 81 | maxwell3d.CallMethodFromMME(0x8e5, 0x0); | ||
| 82 | maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined; | ||
| 83 | } | ||
| 84 | } // namespace | ||
| 85 | |||
| 86 | constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{ | ||
| 87 | std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0), | ||
| 88 | std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD), | ||
| 89 | std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7), | ||
| 90 | }}; | ||
| 91 | |||
| 92 | HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | ||
| 93 | HLEMacro::~HLEMacro() = default; | ||
| 94 | |||
| 95 | std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const { | ||
| 96 | const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(), | ||
| 97 | [hash](const auto& pair) { return pair.first == hash; }); | ||
| 98 | if (it == hle_funcs.end()) { | ||
| 99 | return std::nullopt; | ||
| 100 | } | ||
| 101 | return std::make_unique<HLEMacroImpl>(maxwell3d, it->second); | ||
| 102 | } | ||
| 103 | |||
| 104 | HLEMacroImpl::~HLEMacroImpl() = default; | ||
| 105 | |||
| 106 | HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func) | ||
| 107 | : maxwell3d(maxwell3d), func(func) {} | ||
| 108 | |||
| 109 | void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) { | ||
| 110 | func(maxwell3d, parameters); | ||
| 111 | } | ||
| 112 | |||
| 113 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h new file mode 100644 index 000000000..37af875a0 --- /dev/null +++ b/src/video_core/macro/macro_hle.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/macro/macro.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | |||
| 15 | namespace Engines { | ||
| 16 | class Maxwell3D; | ||
| 17 | } | ||
| 18 | |||
| 19 | using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters); | ||
| 20 | |||
| 21 | class HLEMacro { | ||
| 22 | public: | ||
| 23 | explicit HLEMacro(Engines::Maxwell3D& maxwell3d); | ||
| 24 | ~HLEMacro(); | ||
| 25 | |||
| 26 | std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const; | ||
| 27 | |||
| 28 | private: | ||
| 29 | Engines::Maxwell3D& maxwell3d; | ||
| 30 | }; | ||
| 31 | |||
| 32 | class HLEMacroImpl : public CachedMacro { | ||
| 33 | public: | ||
| 34 | explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func); | ||
| 35 | ~HLEMacroImpl(); | ||
| 36 | |||
| 37 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 38 | |||
| 39 | private: | ||
| 40 | Engines::Maxwell3D& maxwell3d; | ||
| 41 | HLEFunction func; | ||
| 42 | }; | ||
| 43 | |||
| 44 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 5edff27aa..aa5256419 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp | |||
| @@ -11,7 +11,8 @@ | |||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); |
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 14 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) |
| 15 | : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} | ||
| 15 | 16 | ||
| 16 | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { | 17 | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { |
| 17 | return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); | 18 | return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); |
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index 389b58989..07292702f 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | |||
| 28 | BRANCH_HOLDER, | 28 | BRANCH_HOLDER, |
| 29 | }); | 29 | }); |
| 30 | 30 | ||
| 31 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 31 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) |
| 32 | : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {} | ||
| 32 | 33 | ||
| 33 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | 34 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { |
| 34 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | 35 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); |
| @@ -553,7 +554,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { | |||
| 553 | } | 554 | } |
| 554 | 555 | ||
| 555 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { | 556 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { |
| 556 | auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { | 557 | const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) { |
| 557 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero | 558 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero |
| 558 | // register. | 559 | // register. |
| 559 | if (reg == 0) { | 560 | if (reg == 0) { |
| @@ -561,7 +562,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3 | |||
| 561 | } | 562 | } |
| 562 | mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); | 563 | mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); |
| 563 | }; | 564 | }; |
| 564 | auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; | 565 | const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); }; |
| 565 | 566 | ||
| 566 | switch (operation) { | 567 | switch (operation) { |
| 567 | case Macro::ResultOperation::IgnoreAndFetch: | 568 | case Macro::ResultOperation::IgnoreAndFetch: |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index dbee9f634..ff5505d12 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si | |||
| 210 | return range == inner_size; | 210 | return range == inner_size; |
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { | 213 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, |
| 214 | const std::size_t size) const { | ||
| 214 | std::size_t remaining_size{size}; | 215 | std::size_t remaining_size{size}; |
| 215 | std::size_t page_index{src_addr >> page_bits}; | 216 | std::size_t page_index{gpu_src_addr >> page_bits}; |
| 216 | std::size_t page_offset{src_addr & page_mask}; | 217 | std::size_t page_offset{gpu_src_addr & page_mask}; |
| 217 | 218 | ||
| 218 | auto& memory = system.Memory(); | 219 | auto& memory = system.Memory(); |
| 219 | 220 | ||
| @@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s | |||
| 234 | } | 235 | } |
| 235 | } | 236 | } |
| 236 | 237 | ||
| 237 | void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, | 238 | void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, |
| 238 | const std::size_t size) const { | 239 | const std::size_t size) const { |
| 239 | std::size_t remaining_size{size}; | 240 | std::size_t remaining_size{size}; |
| 240 | std::size_t page_index{src_addr >> page_bits}; | 241 | std::size_t page_index{gpu_src_addr >> page_bits}; |
| 241 | std::size_t page_offset{src_addr & page_mask}; | 242 | std::size_t page_offset{gpu_src_addr & page_mask}; |
| 242 | 243 | ||
| 243 | auto& memory = system.Memory(); | 244 | auto& memory = system.Memory(); |
| 244 | 245 | ||
| @@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, | |||
| 259 | } | 260 | } |
| 260 | } | 261 | } |
| 261 | 262 | ||
| 262 | void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { | 263 | void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, |
| 264 | const std::size_t size) { | ||
| 263 | std::size_t remaining_size{size}; | 265 | std::size_t remaining_size{size}; |
| 264 | std::size_t page_index{dest_addr >> page_bits}; | 266 | std::size_t page_index{gpu_dest_addr >> page_bits}; |
| 265 | std::size_t page_offset{dest_addr & page_mask}; | 267 | std::size_t page_offset{gpu_dest_addr & page_mask}; |
| 266 | 268 | ||
| 267 | auto& memory = system.Memory(); | 269 | auto& memory = system.Memory(); |
| 268 | 270 | ||
| @@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const | |||
| 283 | } | 285 | } |
| 284 | } | 286 | } |
| 285 | 287 | ||
| 286 | void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | 288 | void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, |
| 287 | const std::size_t size) { | 289 | const std::size_t size) { |
| 288 | std::size_t remaining_size{size}; | 290 | std::size_t remaining_size{size}; |
| 289 | std::size_t page_index{dest_addr >> page_bits}; | 291 | std::size_t page_index{gpu_dest_addr >> page_bits}; |
| 290 | std::size_t page_offset{dest_addr & page_mask}; | 292 | std::size_t page_offset{gpu_dest_addr & page_mask}; |
| 291 | 293 | ||
| 292 | auto& memory = system.Memory(); | 294 | auto& memory = system.Memory(); |
| 293 | 295 | ||
| @@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 306 | } | 308 | } |
| 307 | } | 309 | } |
| 308 | 310 | ||
| 309 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 311 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, |
| 312 | const std::size_t size) { | ||
| 310 | std::vector<u8> tmp_buffer(size); | 313 | std::vector<u8> tmp_buffer(size); |
| 311 | ReadBlock(src_addr, tmp_buffer.data(), size); | 314 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size); |
| 312 | WriteBlock(dest_addr, tmp_buffer.data(), size); | 315 | WriteBlock(gpu_dest_addr, tmp_buffer.data(), size); |
| 313 | } | 316 | } |
| 314 | 317 | ||
| 315 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 318 | void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, |
| 319 | const std::size_t size) { | ||
| 316 | std::vector<u8> tmp_buffer(size); | 320 | std::vector<u8> tmp_buffer(size); |
| 317 | ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); | 321 | ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size); |
| 318 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); | 322 | WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); |
| 319 | } | 323 | } |
| 320 | 324 | ||
| 321 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | 325 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0ddd52d5a..87658e87a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -79,9 +79,9 @@ public: | |||
| 79 | * in the Host Memory counterpart. Note: This functions cause Host GPU Memory | 79 | * in the Host Memory counterpart. Note: This functions cause Host GPU Memory |
| 80 | * Flushes and Invalidations, respectively to each operation. | 80 | * Flushes and Invalidations, respectively to each operation. |
| 81 | */ | 81 | */ |
| 82 | void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; | 82 | void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; |
| 83 | void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 83 | void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); |
| 84 | void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | 84 | void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); |
| 85 | 85 | ||
| 86 | /** | 86 | /** |
| 87 | * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and | 87 | * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and |
| @@ -93,9 +93,9 @@ public: | |||
| 93 | * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture | 93 | * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture |
| 94 | * being flushed. | 94 | * being flushed. |
| 95 | */ | 95 | */ |
| 96 | void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; | 96 | void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; |
| 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 97 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); |
| 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | 98 | void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size); |
| 99 | 99 | ||
| 100 | /** | 100 | /** |
| 101 | * IsGranularRange checks if a gpu region can be simply read with a pointer | 101 | * IsGranularRange checks if a gpu region can be simply read with a pointer |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ad0577a4f..e461e4c70 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -22,21 +22,53 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 22 | 22 | ||
| 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 24 | 24 | ||
| 25 | Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { | 25 | Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size) |
| 26 | : VideoCommon::BufferBlock{cpu_addr, size} { | ||
| 26 | gl_buffer.Create(); | 27 | gl_buffer.Create(); |
| 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 29 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); | ||
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | ||
| 32 | } | ||
| 28 | } | 33 | } |
| 29 | 34 | ||
| 30 | Buffer::~Buffer() = default; | 35 | Buffer::~Buffer() = default; |
| 31 | 36 | ||
| 37 | void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { | ||
| 38 | glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), | ||
| 39 | data); | ||
| 40 | } | ||
| 41 | |||
| 42 | void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { | ||
| 43 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||
| 44 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||
| 45 | const GLintptr gl_offset = static_cast<GLintptr>(offset); | ||
| 46 | if (read_buffer.handle == 0) { | ||
| 47 | read_buffer.Create(); | ||
| 48 | glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, | ||
| 49 | GL_STREAM_READ); | ||
| 50 | } | ||
| 51 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 52 | glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); | ||
| 53 | glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); | ||
| 54 | } | ||
| 55 | |||
| 56 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||
| 57 | std::size_t size) { | ||
| 58 | glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), | ||
| 59 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | ||
| 60 | } | ||
| 61 | |||
| 32 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 62 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 33 | const Device& device, std::size_t stream_size) | 63 | const Device& device_, std::size_t stream_size) |
| 34 | : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { | 64 | : GenericBufferCache{rasterizer, system, |
| 65 | std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, | ||
| 66 | device{device_} { | ||
| 35 | if (!device.HasFastBufferSubData()) { | 67 | if (!device.HasFastBufferSubData()) { |
| 36 | return; | 68 | return; |
| 37 | } | 69 | } |
| 38 | 70 | ||
| 39 | static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); | 71 | static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); |
| 40 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 72 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); |
| 41 | for (const GLuint cbuf : cbufs) { | 73 | for (const GLuint cbuf : cbufs) { |
| 42 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); | 74 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); |
| @@ -48,39 +80,20 @@ OGLBufferCache::~OGLBufferCache() { | |||
| 48 | } | 80 | } |
| 49 | 81 | ||
| 50 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 82 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 51 | return std::make_shared<Buffer>(cpu_addr, size); | 83 | return std::make_shared<Buffer>(device, cpu_addr, size); |
| 52 | } | 84 | } |
| 53 | 85 | ||
| 54 | GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { | 86 | OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 55 | return 0; | 87 | return {0, 0, 0}; |
| 56 | } | ||
| 57 | |||
| 58 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 59 | const u8* data) { | ||
| 60 | glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 61 | static_cast<GLsizeiptr>(size), data); | ||
| 62 | } | ||
| 63 | |||
| 64 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 65 | u8* data) { | ||
| 66 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | ||
| 67 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 68 | glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 69 | static_cast<GLsizeiptr>(size), data); | ||
| 70 | } | ||
| 71 | |||
| 72 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 73 | std::size_t dst_offset, std::size_t size) { | ||
| 74 | glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), | ||
| 75 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | ||
| 76 | } | 88 | } |
| 77 | 89 | ||
| 78 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | 90 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, |
| 79 | std::size_t size) { | 91 | std::size_t size) { |
| 80 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | 92 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); |
| 81 | const GLuint cbuf = cbufs[cbuf_cursor++]; | 93 | const GLuint cbuf = cbufs[cbuf_cursor++]; |
| 94 | |||
| 82 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | 95 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); |
| 83 | return {cbuf, 0}; | 96 | return {cbuf, 0, 0}; |
| 84 | } | 97 | } |
| 85 | 98 | ||
| 86 | } // namespace OpenGL | 99 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a49aaf9c4..88fdc0536 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -25,15 +25,28 @@ class RasterizerOpenGL; | |||
| 25 | 25 | ||
| 26 | class Buffer : public VideoCommon::BufferBlock { | 26 | class Buffer : public VideoCommon::BufferBlock { |
| 27 | public: | 27 | public: |
| 28 | explicit Buffer(VAddr cpu_addr, const std::size_t size); | 28 | explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size); |
| 29 | ~Buffer(); | 29 | ~Buffer(); |
| 30 | 30 | ||
| 31 | GLuint Handle() const { | 31 | void Upload(std::size_t offset, std::size_t size, const u8* data); |
| 32 | |||
| 33 | void Download(std::size_t offset, std::size_t size, u8* data); | ||
| 34 | |||
| 35 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||
| 36 | std::size_t size); | ||
| 37 | |||
| 38 | GLuint Handle() const noexcept { | ||
| 32 | return gl_buffer.handle; | 39 | return gl_buffer.handle; |
| 33 | } | 40 | } |
| 34 | 41 | ||
| 42 | u64 Address() const noexcept { | ||
| 43 | return gpu_address; | ||
| 44 | } | ||
| 45 | |||
| 35 | private: | 46 | private: |
| 36 | OGLBuffer gl_buffer; | 47 | OGLBuffer gl_buffer; |
| 48 | OGLBuffer read_buffer; | ||
| 49 | u64 gpu_address = 0; | ||
| 37 | }; | 50 | }; |
| 38 | 51 | ||
| 39 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | 52 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; |
| @@ -43,7 +56,7 @@ public: | |||
| 43 | const Device& device, std::size_t stream_size); | 56 | const Device& device, std::size_t stream_size); |
| 44 | ~OGLBufferCache(); | 57 | ~OGLBufferCache(); |
| 45 | 58 | ||
| 46 | GLuint GetEmptyBuffer(std::size_t) override; | 59 | BufferInfo GetEmptyBuffer(std::size_t) override; |
| 47 | 60 | ||
| 48 | void Acquire() noexcept { | 61 | void Acquire() noexcept { |
| 49 | cbuf_cursor = 0; | 62 | cbuf_cursor = 0; |
| @@ -52,22 +65,16 @@ public: | |||
| 52 | protected: | 65 | protected: |
| 53 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 66 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 54 | 67 | ||
| 55 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 56 | const u8* data) override; | ||
| 57 | |||
| 58 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 59 | u8* data) override; | ||
| 60 | |||
| 61 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 62 | std::size_t dst_offset, std::size_t size) override; | ||
| 63 | |||
| 64 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; | 68 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; |
| 65 | 69 | ||
| 66 | private: | 70 | private: |
| 71 | static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 72 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 73 | |||
| 74 | const Device& device; | ||
| 75 | |||
| 67 | std::size_t cbuf_cursor = 0; | 76 | std::size_t cbuf_cursor = 0; |
| 68 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | 77 | std::array<GLuint, NUM_CBUFS> cbufs{}; |
| 69 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram> | ||
| 70 | cbufs; | ||
| 71 | }; | 78 | }; |
| 72 | 79 | ||
| 73 | } // namespace OpenGL | 80 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b31d604e4..208fc6167 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -178,7 +178,7 @@ bool IsASTCSupported() { | |||
| 178 | for (const GLenum format : formats) { | 178 | for (const GLenum format : formats) { |
| 179 | for (const GLenum support : required_support) { | 179 | for (const GLenum support : required_support) { |
| 180 | GLint value; | 180 | GLint value; |
| 181 | glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); | 181 | glGetInternalformativ(target, format, support, 1, &value); |
| 182 | if (value != GL_FULL_SUPPORT) { | 182 | if (value != GL_FULL_SUPPORT) { |
| 183 | return false; | 183 | return false; |
| 184 | } | 184 | } |
| @@ -193,6 +193,7 @@ bool IsASTCSupported() { | |||
| 193 | Device::Device() | 193 | Device::Device() |
| 194 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | 194 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { |
| 195 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 195 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 196 | const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); | ||
| 196 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); | 197 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); |
| 197 | const std::vector extensions = GetExtensions(); | 198 | const std::vector extensions = GetExtensions(); |
| 198 | 199 | ||
| @@ -216,12 +217,18 @@ Device::Device() | |||
| 216 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | 217 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; |
| 217 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 218 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 218 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); | 219 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); |
| 220 | has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod"); | ||
| 219 | has_astc = IsASTCSupported(); | 221 | has_astc = IsASTCSupported(); |
| 220 | has_variable_aoffi = TestVariableAoffi(); | 222 | has_variable_aoffi = TestVariableAoffi(); |
| 221 | has_component_indexing_bug = is_amd; | 223 | has_component_indexing_bug = is_amd; |
| 222 | has_precise_bug = TestPreciseBug(); | 224 | has_precise_bug = TestPreciseBug(); |
| 223 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | ||
| 224 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; | 225 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 226 | has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; | ||
| 227 | |||
| 228 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | ||
| 229 | // uniform buffers as "push constants" | ||
| 230 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; | ||
| 231 | |||
| 225 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 232 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 226 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | 233 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && |
| 227 | GLAD_GL_NV_transform_feedback2; | 234 | GLAD_GL_NV_transform_feedback2; |
| @@ -245,6 +252,7 @@ Device::Device(std::nullptr_t) { | |||
| 245 | has_shader_ballot = true; | 252 | has_shader_ballot = true; |
| 246 | has_vertex_viewport_layer = true; | 253 | has_vertex_viewport_layer = true; |
| 247 | has_image_load_formatted = true; | 254 | has_image_load_formatted = true; |
| 255 | has_texture_shadow_lod = true; | ||
| 248 | has_variable_aoffi = true; | 256 | has_variable_aoffi = true; |
| 249 | } | 257 | } |
| 250 | 258 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 145347943..e1d811966 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -68,6 +68,14 @@ public: | |||
| 68 | return has_image_load_formatted; | 68 | return has_image_load_formatted; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | bool HasTextureShadowLod() const { | ||
| 72 | return has_texture_shadow_lod; | ||
| 73 | } | ||
| 74 | |||
| 75 | bool HasVertexBufferUnifiedMemory() const { | ||
| 76 | return has_vertex_buffer_unified_memory; | ||
| 77 | } | ||
| 78 | |||
| 71 | bool HasASTC() const { | 79 | bool HasASTC() const { |
| 72 | return has_astc; | 80 | return has_astc; |
| 73 | } | 81 | } |
| @@ -110,6 +118,8 @@ private: | |||
| 110 | bool has_shader_ballot{}; | 118 | bool has_shader_ballot{}; |
| 111 | bool has_vertex_viewport_layer{}; | 119 | bool has_vertex_viewport_layer{}; |
| 112 | bool has_image_load_formatted{}; | 120 | bool has_image_load_formatted{}; |
| 121 | bool has_texture_shadow_lod{}; | ||
| 122 | bool has_vertex_buffer_unified_memory{}; | ||
| 113 | bool has_astc{}; | 123 | bool has_astc{}; |
| 114 | bool has_variable_aoffi{}; | 124 | bool has_variable_aoffi{}; |
| 115 | bool has_component_indexing_bug{}; | 125 | bool has_component_indexing_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2d6c11320..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | |||
| 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = | 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = |
| 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; |
| 63 | 63 | ||
| 64 | constexpr std::size_t NumSupportedVertexAttributes = 16; | 64 | constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 65 | constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; | ||
| 65 | 66 | ||
| 66 | template <typename Engine, typename Entry> | 67 | template <typename Engine, typename Entry> |
| 67 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 68 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| @@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 193 | // avoid OpenGL errors. | 194 | // avoid OpenGL errors. |
| 194 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't | 195 | // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't |
| 195 | // assume every shader uses them all. | 196 | // assume every shader uses them all. |
| 196 | for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { | 197 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { |
| 197 | if (!flags[Dirty::VertexFormat0 + index]) { | 198 | if (!flags[Dirty::VertexFormat0 + index]) { |
| 198 | continue; | 199 | continue; |
| 199 | } | 200 | } |
| @@ -212,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 212 | if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || | 213 | if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || |
| 213 | attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { | 214 | attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { |
| 214 | glVertexAttribIFormat(gl_index, attrib.ComponentCount(), | 215 | glVertexAttribIFormat(gl_index, attrib.ComponentCount(), |
| 215 | MaxwellToGL::VertexType(attrib), attrib.offset); | 216 | MaxwellToGL::VertexFormat(attrib), attrib.offset); |
| 216 | } else { | 217 | } else { |
| 217 | glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | 218 | glVertexAttribFormat(gl_index, attrib.ComponentCount(), |
| 219 | MaxwellToGL::VertexFormat(attrib), | ||
| 218 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | 220 | attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); |
| 219 | } | 221 | } |
| 220 | glVertexAttribBinding(gl_index, attrib.buffer); | 222 | glVertexAttribBinding(gl_index, attrib.buffer); |
| @@ -231,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 231 | 233 | ||
| 232 | MICROPROFILE_SCOPE(OpenGL_VB); | 234 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 233 | 235 | ||
| 236 | const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); | ||
| 237 | |||
| 234 | // Upload all guest vertex arrays sequentially to our buffer | 238 | // Upload all guest vertex arrays sequentially to our buffer |
| 235 | const auto& regs = gpu.regs; | 239 | const auto& regs = gpu.regs; |
| 236 | for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | 240 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { |
| 237 | if (!flags[Dirty::VertexBuffer0 + index]) { | 241 | if (!flags[Dirty::VertexBuffer0 + index]) { |
| 238 | continue; | 242 | continue; |
| 239 | } | 243 | } |
| @@ -246,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() { | |||
| 246 | 250 | ||
| 247 | const GPUVAddr start = vertex_array.StartAddress(); | 251 | const GPUVAddr start = vertex_array.StartAddress(); |
| 248 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | 252 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); |
| 249 | |||
| 250 | ASSERT(end >= start); | 253 | ASSERT(end >= start); |
| 254 | |||
| 255 | const GLuint gl_index = static_cast<GLuint>(index); | ||
| 251 | const u64 size = end - start; | 256 | const u64 size = end - start; |
| 252 | if (size == 0) { | 257 | if (size == 0) { |
| 253 | glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); | 258 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); |
| 259 | if (use_unified_memory) { | ||
| 260 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); | ||
| 261 | } | ||
| 254 | continue; | 262 | continue; |
| 255 | } | 263 | } |
| 256 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); | 264 | const auto info = buffer_cache.UploadMemory(start, size); |
| 257 | glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, | 265 | if (use_unified_memory) { |
| 258 | vertex_array.stride); | 266 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); |
| 267 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, | ||
| 268 | info.address + info.offset, size); | ||
| 269 | } else { | ||
| 270 | glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); | ||
| 271 | } | ||
| 259 | } | 272 | } |
| 260 | } | 273 | } |
| 261 | 274 | ||
| @@ -268,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() { | |||
| 268 | flags[Dirty::VertexInstances] = false; | 281 | flags[Dirty::VertexInstances] = false; |
| 269 | 282 | ||
| 270 | const auto& regs = gpu.regs; | 283 | const auto& regs = gpu.regs; |
| 271 | for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { | 284 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { |
| 272 | if (!flags[Dirty::VertexInstance0 + index]) { | 285 | if (!flags[Dirty::VertexInstance0 + index]) { |
| 273 | continue; | 286 | continue; |
| 274 | } | 287 | } |
| @@ -285,9 +298,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 285 | MICROPROFILE_SCOPE(OpenGL_Index); | 298 | MICROPROFILE_SCOPE(OpenGL_Index); |
| 286 | const auto& regs = system.GPU().Maxwell3D().regs; | 299 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 287 | const std::size_t size = CalculateIndexBufferSize(); | 300 | const std::size_t size = CalculateIndexBufferSize(); |
| 288 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | 301 | const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); |
| 289 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); | 302 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); |
| 290 | return offset; | 303 | return info.offset; |
| 291 | } | 304 | } |
| 292 | 305 | ||
| 293 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 306 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| @@ -643,9 +656,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 643 | if (!device.UseAssemblyShaders()) { | 656 | if (!device.UseAssemblyShaders()) { |
| 644 | MaxwellUniformData ubo; | 657 | MaxwellUniformData ubo; |
| 645 | ubo.SetFromRegs(gpu); | 658 | ubo.SetFromRegs(gpu); |
| 646 | const auto [buffer, offset] = | 659 | const auto info = |
| 647 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 660 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 648 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, | 661 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, |
| 649 | static_cast<GLsizeiptr>(sizeof(ubo))); | 662 | static_cast<GLsizeiptr>(sizeof(ubo))); |
| 650 | } | 663 | } |
| 651 | 664 | ||
| @@ -956,8 +969,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 956 | if (device.UseAssemblyShaders()) { | 969 | if (device.UseAssemblyShaders()) { |
| 957 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); | 970 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); |
| 958 | } else { | 971 | } else { |
| 959 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | 972 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); |
| 960 | buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | ||
| 961 | } | 973 | } |
| 962 | return; | 974 | return; |
| 963 | } | 975 | } |
| @@ -970,24 +982,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 970 | 982 | ||
| 971 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); | 983 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); |
| 972 | const GPUVAddr gpu_addr = buffer.address; | 984 | const GPUVAddr gpu_addr = buffer.address; |
| 973 | auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); | 985 | auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); |
| 974 | 986 | ||
| 975 | if (device.UseAssemblyShaders()) { | 987 | if (device.UseAssemblyShaders()) { |
| 976 | UNIMPLEMENTED_IF(use_unified); | 988 | UNIMPLEMENTED_IF(use_unified); |
| 977 | if (offset != 0) { | 989 | if (info.offset != 0) { |
| 978 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | 990 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; |
| 979 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | 991 | glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); |
| 980 | cbuf = staging_cbuf; | 992 | info.handle = staging_cbuf; |
| 981 | offset = 0; | 993 | info.offset = 0; |
| 982 | } | 994 | } |
| 983 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | 995 | glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); |
| 984 | return; | 996 | return; |
| 985 | } | 997 | } |
| 986 | 998 | ||
| 987 | if (use_unified) { | 999 | if (use_unified) { |
| 988 | glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); | 1000 | glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, |
| 1001 | unified_offset, size); | ||
| 989 | } else { | 1002 | } else { |
| 990 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | 1003 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); |
| 991 | } | 1004 | } |
| 992 | } | 1005 | } |
| 993 | 1006 | ||
| @@ -1023,9 +1036,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | |||
| 1023 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | 1036 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 1024 | GPUVAddr gpu_addr, std::size_t size) { | 1037 | GPUVAddr gpu_addr, std::size_t size) { |
| 1025 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 1038 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 1026 | const auto [ssbo, buffer_offset] = | 1039 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); |
| 1027 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | 1040 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, |
| 1028 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, | ||
| 1029 | static_cast<GLsizeiptr>(size)); | 1041 | static_cast<GLsizeiptr>(size)); |
| 1030 | } | 1042 | } |
| 1031 | 1043 | ||
| @@ -1712,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() { | |||
| 1712 | const GLuint handle = transform_feedback_buffers[index].handle; | 1724 | const GLuint handle = transform_feedback_buffers[index].handle; |
| 1713 | const GPUVAddr gpu_addr = binding.Address(); | 1725 | const GPUVAddr gpu_addr = binding.Address(); |
| 1714 | const std::size_t size = binding.buffer_size; | 1726 | const std::size_t size = binding.buffer_size; |
| 1715 | const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | 1727 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); |
| 1716 | glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); | 1728 | glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, |
| 1729 | static_cast<GLsizeiptr>(size)); | ||
| 1717 | } | 1730 | } |
| 1718 | } | 1731 | } |
| 1719 | 1732 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 46e780a06..c6a3bf3a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -460,8 +460,9 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 460 | const u8* host_ptr_b = memory_manager.GetPointer(address_b); | 460 | const u8* host_ptr_b = memory_manager.GetPointer(address_b); |
| 461 | code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); | 461 | code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); |
| 462 | } | 462 | } |
| 463 | const std::size_t code_size = code.size() * sizeof(u64); | ||
| 463 | 464 | ||
| 464 | const auto unique_identifier = GetUniqueIdentifier( | 465 | const u64 unique_identifier = GetUniqueIdentifier( |
| 465 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 466 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 466 | 467 | ||
| 467 | const ShaderParameters params{system, disk_cache, device, | 468 | const ShaderParameters params{system, disk_cache, device, |
| @@ -477,7 +478,7 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 477 | 478 | ||
| 478 | Shader* const result = shader.get(); | 479 | Shader* const result = shader.get(); |
| 479 | if (cpu_addr) { | 480 | if (cpu_addr) { |
| 480 | Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); | 481 | Register(std::move(shader), *cpu_addr, code_size); |
| 481 | } else { | 482 | } else { |
| 482 | null_shader = std::move(shader); | 483 | null_shader = std::move(shader); |
| 483 | } | 484 | } |
| @@ -495,8 +496,9 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 495 | 496 | ||
| 496 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | 497 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; |
| 497 | // No kernel found, create a new one | 498 | // No kernel found, create a new one |
| 498 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; | 499 | ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; |
| 499 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | 500 | const std::size_t code_size{code.size() * sizeof(u64)}; |
| 501 | const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | ||
| 500 | 502 | ||
| 501 | const ShaderParameters params{system, disk_cache, device, | 503 | const ShaderParameters params{system, disk_cache, device, |
| 502 | *cpu_addr, host_ptr, unique_identifier}; | 504 | *cpu_addr, host_ptr, unique_identifier}; |
| @@ -511,7 +513,7 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 511 | 513 | ||
| 512 | Shader* const result = kernel.get(); | 514 | Shader* const result = kernel.get(); |
| 513 | if (cpu_addr) { | 515 | if (cpu_addr) { |
| 514 | Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); | 516 | Register(std::move(kernel), *cpu_addr, code_size); |
| 515 | } else { | 517 | } else { |
| 516 | null_kernel = std::move(kernel); | 518 | null_kernel = std::move(kernel); |
| 517 | } | 519 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6848f1388..994aaeaf2 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -37,7 +37,6 @@ namespace OpenGL { | |||
| 37 | 37 | ||
| 38 | class Device; | 38 | class Device; |
| 39 | class RasterizerOpenGL; | 39 | class RasterizerOpenGL; |
| 40 | struct UnspecializedShader; | ||
| 41 | 40 | ||
| 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 41 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 43 | 42 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index d6e30b321..2c49aeaac 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode; | |||
| 37 | using Tegra::Shader::IpaSampleMode; | 37 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::PixelImap; | 38 | using Tegra::Shader::PixelImap; |
| 39 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using Tegra::Shader::TextureType; | ||
| 40 | using VideoCommon::Shader::BuildTransformFeedback; | 41 | using VideoCommon::Shader::BuildTransformFeedback; |
| 41 | using VideoCommon::Shader::Registry; | 42 | using VideoCommon::Shader::Registry; |
| 42 | 43 | ||
| @@ -526,6 +527,9 @@ private: | |||
| 526 | if (device.HasImageLoadFormatted()) { | 527 | if (device.HasImageLoadFormatted()) { |
| 527 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); | 528 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); |
| 528 | } | 529 | } |
| 530 | if (device.HasTextureShadowLod()) { | ||
| 531 | code.AddLine("#extension GL_EXT_texture_shadow_lod : require"); | ||
| 532 | } | ||
| 529 | if (device.HasWarpIntrinsics()) { | 533 | if (device.HasWarpIntrinsics()) { |
| 530 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | 534 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); |
| 531 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | 535 | code.AddLine("#extension GL_NV_shader_thread_group : require"); |
| @@ -909,13 +913,13 @@ private: | |||
| 909 | return "samplerBuffer"; | 913 | return "samplerBuffer"; |
| 910 | } | 914 | } |
| 911 | switch (sampler.type) { | 915 | switch (sampler.type) { |
| 912 | case Tegra::Shader::TextureType::Texture1D: | 916 | case TextureType::Texture1D: |
| 913 | return "sampler1D"; | 917 | return "sampler1D"; |
| 914 | case Tegra::Shader::TextureType::Texture2D: | 918 | case TextureType::Texture2D: |
| 915 | return "sampler2D"; | 919 | return "sampler2D"; |
| 916 | case Tegra::Shader::TextureType::Texture3D: | 920 | case TextureType::Texture3D: |
| 917 | return "sampler3D"; | 921 | return "sampler3D"; |
| 918 | case Tegra::Shader::TextureType::TextureCube: | 922 | case TextureType::TextureCube: |
| 919 | return "samplerCube"; | 923 | return "samplerCube"; |
| 920 | default: | 924 | default: |
| 921 | UNREACHABLE(); | 925 | UNREACHABLE(); |
| @@ -1380,8 +1384,19 @@ private: | |||
| 1380 | const std::size_t count = operation.GetOperandsCount(); | 1384 | const std::size_t count = operation.GetOperandsCount(); |
| 1381 | const bool has_array = meta->sampler.is_array; | 1385 | const bool has_array = meta->sampler.is_array; |
| 1382 | const bool has_shadow = meta->sampler.is_shadow; | 1386 | const bool has_shadow = meta->sampler.is_shadow; |
| 1387 | const bool workaround_lod_array_shadow_as_grad = | ||
| 1388 | !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow && | ||
| 1389 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 1390 | meta->sampler.type == TextureType::TextureCube); | ||
| 1391 | |||
| 1392 | std::string expr = "texture"; | ||
| 1393 | |||
| 1394 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1395 | expr += "Grad"; | ||
| 1396 | } else { | ||
| 1397 | expr += function_suffix; | ||
| 1398 | } | ||
| 1383 | 1399 | ||
| 1384 | std::string expr = "texture" + function_suffix; | ||
| 1385 | if (!meta->aoffi.empty()) { | 1400 | if (!meta->aoffi.empty()) { |
| 1386 | expr += "Offset"; | 1401 | expr += "Offset"; |
| 1387 | } else if (!meta->ptp.empty()) { | 1402 | } else if (!meta->ptp.empty()) { |
| @@ -1415,6 +1430,16 @@ private: | |||
| 1415 | expr += ')'; | 1430 | expr += ')'; |
| 1416 | } | 1431 | } |
| 1417 | 1432 | ||
| 1433 | if (workaround_lod_array_shadow_as_grad) { | ||
| 1434 | switch (meta->sampler.type) { | ||
| 1435 | case TextureType::Texture2D: | ||
| 1436 | return expr + ", vec2(0.0), vec2(0.0))"; | ||
| 1437 | case TextureType::TextureCube: | ||
| 1438 | return expr + ", vec3(0.0), vec3(0.0))"; | ||
| 1439 | } | ||
| 1440 | UNREACHABLE(); | ||
| 1441 | } | ||
| 1442 | |||
| 1418 | for (const auto& variant : extras) { | 1443 | for (const auto& variant : extras) { |
| 1419 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { | 1444 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { |
| 1420 | expr += GenerateTextureArgument(*argument); | 1445 | expr += GenerateTextureArgument(*argument); |
| @@ -2041,8 +2066,19 @@ private: | |||
| 2041 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 2066 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 2042 | ASSERT(meta); | 2067 | ASSERT(meta); |
| 2043 | 2068 | ||
| 2044 | std::string expr = GenerateTexture( | 2069 | std::string expr{}; |
| 2045 | operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); | 2070 | |
| 2071 | if (!device.HasTextureShadowLod() && meta->sampler.is_shadow && | ||
| 2072 | ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) || | ||
| 2073 | meta->sampler.type == TextureType::TextureCube)) { | ||
| 2074 | LOG_ERROR(Render_OpenGL, | ||
| 2075 | "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround"); | ||
| 2076 | expr = GenerateTexture(operation, "Lod", {}); | ||
| 2077 | } else { | ||
| 2078 | expr = GenerateTexture(operation, "Lod", | ||
| 2079 | {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); | ||
| 2080 | } | ||
| 2081 | |||
| 2046 | if (meta->sampler.is_shadow) { | 2082 | if (meta->sampler.is_shadow) { |
| 2047 | expr = "vec4(" + expr + ')'; | 2083 | expr = "vec4(" + expr + ')'; |
| 2048 | } | 2084 | } |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 932a2f69e..3655ff629 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -2,11 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <deque> | 5 | #include <tuple> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | |||
| 7 | #include "common/alignment.h" | 8 | #include "common/alignment.h" |
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 12 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 11 | 13 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | 14 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", |
| @@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | |||
| 14 | 16 | ||
| 15 | namespace OpenGL { | 17 | namespace OpenGL { |
| 16 | 18 | ||
| 17 | OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, | 19 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage) |
| 18 | bool use_persistent) | ||
| 19 | : buffer_size(size) { | 20 | : buffer_size(size) { |
| 20 | gl_buffer.Create(); | 21 | gl_buffer.Create(); |
| 21 | 22 | ||
| @@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p | |||
| 29 | allocate_size *= 2; | 30 | allocate_size *= 2; |
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | if (use_persistent) { | 33 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; |
| 33 | persistent = true; | 34 | glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); |
| 34 | coherent = prefer_coherent; | 35 | mapped_ptr = static_cast<u8*>( |
| 35 | const GLbitfield flags = | 36 | glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); |
| 36 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); | 37 | |
| 37 | glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); | 38 | if (device.HasVertexBufferUnifiedMemory()) { |
| 38 | mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( | 39 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); |
| 39 | gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); | 40 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); |
| 40 | } else { | ||
| 41 | glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW); | ||
| 42 | } | 41 | } |
| 43 | } | 42 | } |
| 44 | 43 | ||
| 45 | OGLStreamBuffer::~OGLStreamBuffer() { | 44 | OGLStreamBuffer::~OGLStreamBuffer() { |
| 46 | if (persistent) { | 45 | glUnmapNamedBuffer(gl_buffer.handle); |
| 47 | glUnmapNamedBuffer(gl_buffer.handle); | ||
| 48 | } | ||
| 49 | gl_buffer.Release(); | 46 | gl_buffer.Release(); |
| 50 | } | 47 | } |
| 51 | 48 | ||
| @@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a | |||
| 60 | 57 | ||
| 61 | bool invalidate = false; | 58 | bool invalidate = false; |
| 62 | if (buffer_pos + size > buffer_size) { | 59 | if (buffer_pos + size > buffer_size) { |
| 60 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | ||
| 61 | glInvalidateBufferData(gl_buffer.handle); | ||
| 62 | |||
| 63 | buffer_pos = 0; | 63 | buffer_pos = 0; |
| 64 | invalidate = true; | 64 | invalidate = true; |
| 65 | |||
| 66 | if (persistent) { | ||
| 67 | glUnmapNamedBuffer(gl_buffer.handle); | ||
| 68 | } | ||
| 69 | } | 65 | } |
| 70 | 66 | ||
| 71 | if (invalidate || !persistent) { | 67 | return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate); |
| 72 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | ||
| 73 | GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | | ||
| 74 | (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | | ||
| 75 | (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); | ||
| 76 | mapped_ptr = static_cast<u8*>( | ||
| 77 | glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags)); | ||
| 78 | mapped_offset = buffer_pos; | ||
| 79 | } | ||
| 80 | |||
| 81 | return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); | ||
| 82 | } | 68 | } |
| 83 | 69 | ||
| 84 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | 70 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { |
| 85 | ASSERT(size <= mapped_size); | 71 | ASSERT(size <= mapped_size); |
| 86 | 72 | ||
| 87 | if (!coherent && size > 0) { | 73 | if (size > 0) { |
| 88 | glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); | 74 | glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); |
| 89 | } | ||
| 90 | |||
| 91 | if (!persistent) { | ||
| 92 | glUnmapNamedBuffer(gl_buffer.handle); | ||
| 93 | } | 75 | } |
| 94 | 76 | ||
| 95 | buffer_pos += size; | 77 | buffer_pos += size; |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 866da3594..307a67113 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -11,10 +11,11 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class Device; | ||
| 15 | |||
| 14 | class OGLStreamBuffer : private NonCopyable { | 16 | class OGLStreamBuffer : private NonCopyable { |
| 15 | public: | 17 | public: |
| 16 | explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, | 18 | explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage); |
| 17 | bool use_persistent = true); | ||
| 18 | ~OGLStreamBuffer(); | 19 | ~OGLStreamBuffer(); |
| 19 | 20 | ||
| 20 | /* | 21 | /* |
| @@ -33,19 +34,20 @@ public: | |||
| 33 | return gl_buffer.handle; | 34 | return gl_buffer.handle; |
| 34 | } | 35 | } |
| 35 | 36 | ||
| 36 | GLsizeiptr Size() const { | 37 | u64 Address() const { |
| 38 | return gpu_address; | ||
| 39 | } | ||
| 40 | |||
| 41 | GLsizeiptr Size() const noexcept { | ||
| 37 | return buffer_size; | 42 | return buffer_size; |
| 38 | } | 43 | } |
| 39 | 44 | ||
| 40 | private: | 45 | private: |
| 41 | OGLBuffer gl_buffer; | 46 | OGLBuffer gl_buffer; |
| 42 | 47 | ||
| 43 | bool coherent = false; | 48 | GLuint64EXT gpu_address = 0; |
| 44 | bool persistent = false; | ||
| 45 | |||
| 46 | GLintptr buffer_pos = 0; | 49 | GLintptr buffer_pos = 0; |
| 47 | GLsizeiptr buffer_size = 0; | 50 | GLsizeiptr buffer_size = 0; |
| 48 | GLintptr mapped_offset = 0; | ||
| 49 | GLsizeiptr mapped_size = 0; | 51 | GLsizeiptr mapped_size = 0; |
| 50 | u8* mapped_ptr = nullptr; | 52 | u8* mapped_ptr = nullptr; |
| 51 | }; | 53 | }; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 35e329240..fe9bd4b5a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -24,10 +24,11 @@ namespace MaxwellToGL { | |||
| 24 | 24 | ||
| 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 26 | 26 | ||
| 27 | inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | 27 | inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { |
| 28 | switch (attrib.type) { | 28 | switch (attrib.type) { |
| 29 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 30 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | 29 | case Maxwell::VertexAttribute::Type::UnsignedNorm: |
| 30 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 31 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 31 | switch (attrib.size) { | 32 | switch (attrib.size) { |
| 32 | case Maxwell::VertexAttribute::Size::Size_8: | 33 | case Maxwell::VertexAttribute::Size::Size_8: |
| 33 | case Maxwell::VertexAttribute::Size::Size_8_8: | 34 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| @@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||
| 48 | return GL_UNSIGNED_INT_2_10_10_10_REV; | 49 | return GL_UNSIGNED_INT_2_10_10_10_REV; |
| 49 | } | 50 | } |
| 50 | break; | 51 | break; |
| 51 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 52 | case Maxwell::VertexAttribute::Type::SignedNorm: | 52 | case Maxwell::VertexAttribute::Type::SignedNorm: |
| 53 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 54 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 53 | switch (attrib.size) { | 55 | switch (attrib.size) { |
| 54 | case Maxwell::VertexAttribute::Size::Size_8: | 56 | case Maxwell::VertexAttribute::Size::Size_8: |
| 55 | case Maxwell::VertexAttribute::Size::Size_8_8: | 57 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| @@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { | |||
| 84 | return GL_FLOAT; | 86 | return GL_FLOAT; |
| 85 | } | 87 | } |
| 86 | break; | 88 | break; |
| 87 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 88 | switch (attrib.size) { | ||
| 89 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 90 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 91 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 92 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 93 | return GL_UNSIGNED_BYTE; | ||
| 94 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 95 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 96 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 97 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 98 | return GL_UNSIGNED_SHORT; | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 102 | switch (attrib.size) { | ||
| 103 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 104 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 105 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 106 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 107 | return GL_BYTE; | ||
| 108 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 109 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 110 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 111 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 112 | return GL_SHORT; | ||
| 113 | } | ||
| 114 | break; | ||
| 115 | } | 89 | } |
| 116 | UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), | 90 | UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(), |
| 117 | attrib.SizeString()); | 91 | attrib.SizeString()); |
| 118 | return {}; | 92 | return {}; |
| 119 | } | 93 | } |
| @@ -217,6 +191,12 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { | |||
| 217 | } else { | 191 | } else { |
| 218 | return GL_MIRROR_CLAMP_TO_EDGE; | 192 | return GL_MIRROR_CLAMP_TO_EDGE; |
| 219 | } | 193 | } |
| 194 | case Tegra::Texture::WrapMode::MirrorOnceClampOGL: | ||
| 195 | if (GL_EXT_texture_mirror_clamp) { | ||
| 196 | return GL_MIRROR_CLAMP_EXT; | ||
| 197 | } else { | ||
| 198 | return GL_MIRROR_CLAMP_TO_EDGE; | ||
| 199 | } | ||
| 220 | } | 200 | } |
| 221 | UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); | 201 | UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); |
| 222 | return GL_REPEAT; | 202 | return GL_REPEAT; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6214fcbc3..c40adb6e7 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 488 | 488 | ||
| 489 | // Clear screen to black | 489 | // Clear screen to black |
| 490 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 490 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 491 | |||
| 492 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | ||
| 493 | if (device.HasVertexBufferUnifiedMemory()) { | ||
| 494 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | ||
| 495 | |||
| 496 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | ||
| 497 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | ||
| 498 | &vertex_buffer_address); | ||
| 499 | } | ||
| 491 | } | 500 | } |
| 492 | 501 | ||
| 493 | void RendererOpenGL::AddTelemetryFields() { | 502 | void RendererOpenGL::AddTelemetryFields() { |
| @@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 656 | offsetof(ScreenRectVertex, tex_coord)); | 665 | offsetof(ScreenRectVertex, tex_coord)); |
| 657 | glVertexAttribBinding(PositionLocation, 0); | 666 | glVertexAttribBinding(PositionLocation, 0); |
| 658 | glVertexAttribBinding(TexCoordLocation, 0); | 667 | glVertexAttribBinding(TexCoordLocation, 0); |
| 659 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | 668 | if (device.HasVertexBufferUnifiedMemory()) { |
| 669 | glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); | ||
| 670 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, | ||
| 671 | sizeof(vertices)); | ||
| 672 | } else { | ||
| 673 | glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); | ||
| 674 | } | ||
| 660 | 675 | ||
| 661 | glBindTextureUnit(0, screen_info.display_texture); | 676 | glBindTextureUnit(0, screen_info.display_texture); |
| 662 | glBindSampler(0, 0); | 677 | glBindSampler(0, 0); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 61bf507f4..8b18d32e6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -107,6 +107,9 @@ private: | |||
| 107 | OGLPipeline pipeline; | 107 | OGLPipeline pipeline; |
| 108 | OGLFramebuffer screenshot_framebuffer; | 108 | OGLFramebuffer screenshot_framebuffer; |
| 109 | 109 | ||
| 110 | // GPU address of the vertex buffer | ||
| 111 | GLuint64EXT vertex_buffer_address = 0; | ||
| 112 | |||
| 110 | /// Display information for Switch screen | 113 | /// Display information for Switch screen |
| 111 | ScreenInfo screen_info; | 114 | ScreenInfo screen_info; |
| 112 | 115 | ||
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 1f2b6734b..d7f1ae89f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, | |||
| 294 | 294 | ||
| 295 | VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { | 295 | VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { |
| 296 | switch (type) { | 296 | switch (type) { |
| 297 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 298 | switch (size) { | ||
| 299 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 300 | return VK_FORMAT_R8_UNORM; | ||
| 301 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 302 | return VK_FORMAT_R8G8_UNORM; | ||
| 303 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 304 | return VK_FORMAT_R8G8B8_UNORM; | ||
| 305 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 306 | return VK_FORMAT_R8G8B8A8_UNORM; | ||
| 307 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 308 | return VK_FORMAT_R16_UNORM; | ||
| 309 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 310 | return VK_FORMAT_R16G16_UNORM; | ||
| 311 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 312 | return VK_FORMAT_R16G16B16_UNORM; | ||
| 313 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 314 | return VK_FORMAT_R16G16B16A16_UNORM; | ||
| 315 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | ||
| 316 | return VK_FORMAT_A2B10G10R10_UNORM_PACK32; | ||
| 317 | } | ||
| 318 | break; | ||
| 297 | case Maxwell::VertexAttribute::Type::SignedNorm: | 319 | case Maxwell::VertexAttribute::Type::SignedNorm: |
| 298 | switch (size) { | 320 | switch (size) { |
| 299 | case Maxwell::VertexAttribute::Size::Size_8: | 321 | case Maxwell::VertexAttribute::Size::Size_8: |
| @@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 314 | return VK_FORMAT_R16G16B16A16_SNORM; | 336 | return VK_FORMAT_R16G16B16A16_SNORM; |
| 315 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | 337 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 316 | return VK_FORMAT_A2B10G10R10_SNORM_PACK32; | 338 | return VK_FORMAT_A2B10G10R10_SNORM_PACK32; |
| 317 | default: | ||
| 318 | break; | ||
| 319 | } | 339 | } |
| 320 | break; | 340 | break; |
| 321 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | 341 | case Maxwell::VertexAttribute::Type::UnsignedScaled: |
| 322 | switch (size) { | 342 | switch (size) { |
| 323 | case Maxwell::VertexAttribute::Size::Size_8: | 343 | case Maxwell::VertexAttribute::Size::Size_8: |
| 324 | return VK_FORMAT_R8_UNORM; | 344 | return VK_FORMAT_R8_USCALED; |
| 325 | case Maxwell::VertexAttribute::Size::Size_8_8: | 345 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 326 | return VK_FORMAT_R8G8_UNORM; | 346 | return VK_FORMAT_R8G8_USCALED; |
| 327 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 347 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 328 | return VK_FORMAT_R8G8B8_UNORM; | 348 | return VK_FORMAT_R8G8B8_USCALED; |
| 329 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 349 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 330 | return VK_FORMAT_R8G8B8A8_UNORM; | 350 | return VK_FORMAT_R8G8B8A8_USCALED; |
| 331 | case Maxwell::VertexAttribute::Size::Size_16: | 351 | case Maxwell::VertexAttribute::Size::Size_16: |
| 332 | return VK_FORMAT_R16_UNORM; | 352 | return VK_FORMAT_R16_USCALED; |
| 333 | case Maxwell::VertexAttribute::Size::Size_16_16: | 353 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 334 | return VK_FORMAT_R16G16_UNORM; | 354 | return VK_FORMAT_R16G16_USCALED; |
| 335 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 355 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 336 | return VK_FORMAT_R16G16B16_UNORM; | 356 | return VK_FORMAT_R16G16B16_USCALED; |
| 337 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 357 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 338 | return VK_FORMAT_R16G16B16A16_UNORM; | 358 | return VK_FORMAT_R16G16B16A16_USCALED; |
| 339 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | 359 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 340 | return VK_FORMAT_A2B10G10R10_UNORM_PACK32; | 360 | return VK_FORMAT_A2B10G10R10_USCALED_PACK32; |
| 341 | default: | ||
| 342 | break; | ||
| 343 | } | 361 | } |
| 344 | break; | 362 | break; |
| 345 | case Maxwell::VertexAttribute::Type::SignedInt: | 363 | case Maxwell::VertexAttribute::Type::SignedScaled: |
| 346 | switch (size) { | 364 | switch (size) { |
| 347 | case Maxwell::VertexAttribute::Size::Size_8: | 365 | case Maxwell::VertexAttribute::Size::Size_8: |
| 348 | return VK_FORMAT_R8_SINT; | 366 | return VK_FORMAT_R8_SSCALED; |
| 349 | case Maxwell::VertexAttribute::Size::Size_8_8: | 367 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 350 | return VK_FORMAT_R8G8_SINT; | 368 | return VK_FORMAT_R8G8_SSCALED; |
| 351 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 369 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 352 | return VK_FORMAT_R8G8B8_SINT; | 370 | return VK_FORMAT_R8G8B8_SSCALED; |
| 353 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 371 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 354 | return VK_FORMAT_R8G8B8A8_SINT; | 372 | return VK_FORMAT_R8G8B8A8_SSCALED; |
| 355 | case Maxwell::VertexAttribute::Size::Size_16: | 373 | case Maxwell::VertexAttribute::Size::Size_16: |
| 356 | return VK_FORMAT_R16_SINT; | 374 | return VK_FORMAT_R16_SSCALED; |
| 357 | case Maxwell::VertexAttribute::Size::Size_16_16: | 375 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 358 | return VK_FORMAT_R16G16_SINT; | 376 | return VK_FORMAT_R16G16_SSCALED; |
| 359 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 377 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 360 | return VK_FORMAT_R16G16B16_SINT; | 378 | return VK_FORMAT_R16G16B16_SSCALED; |
| 361 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 379 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 362 | return VK_FORMAT_R16G16B16A16_SINT; | 380 | return VK_FORMAT_R16G16B16A16_SSCALED; |
| 363 | case Maxwell::VertexAttribute::Size::Size_32: | 381 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 364 | return VK_FORMAT_R32_SINT; | 382 | return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; |
| 365 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 366 | return VK_FORMAT_R32G32_SINT; | ||
| 367 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 368 | return VK_FORMAT_R32G32B32_SINT; | ||
| 369 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | ||
| 370 | return VK_FORMAT_R32G32B32A32_SINT; | ||
| 371 | default: | ||
| 372 | break; | ||
| 373 | } | 383 | } |
| 374 | break; | 384 | break; |
| 375 | case Maxwell::VertexAttribute::Type::UnsignedInt: | 385 | case Maxwell::VertexAttribute::Type::UnsignedInt: |
| @@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 398 | return VK_FORMAT_R32G32B32_UINT; | 408 | return VK_FORMAT_R32G32B32_UINT; |
| 399 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 409 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 400 | return VK_FORMAT_R32G32B32A32_UINT; | 410 | return VK_FORMAT_R32G32B32A32_UINT; |
| 401 | default: | 411 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 402 | break; | 412 | return VK_FORMAT_A2B10G10R10_UINT_PACK32; |
| 403 | } | 413 | } |
| 404 | break; | 414 | break; |
| 405 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | 415 | case Maxwell::VertexAttribute::Type::SignedInt: |
| 406 | switch (size) { | 416 | switch (size) { |
| 407 | case Maxwell::VertexAttribute::Size::Size_8: | 417 | case Maxwell::VertexAttribute::Size::Size_8: |
| 408 | return VK_FORMAT_R8_USCALED; | 418 | return VK_FORMAT_R8_SINT; |
| 409 | case Maxwell::VertexAttribute::Size::Size_8_8: | 419 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 410 | return VK_FORMAT_R8G8_USCALED; | 420 | return VK_FORMAT_R8G8_SINT; |
| 411 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 421 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 412 | return VK_FORMAT_R8G8B8_USCALED; | 422 | return VK_FORMAT_R8G8B8_SINT; |
| 413 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 423 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 414 | return VK_FORMAT_R8G8B8A8_USCALED; | 424 | return VK_FORMAT_R8G8B8A8_SINT; |
| 415 | case Maxwell::VertexAttribute::Size::Size_16: | 425 | case Maxwell::VertexAttribute::Size::Size_16: |
| 416 | return VK_FORMAT_R16_USCALED; | 426 | return VK_FORMAT_R16_SINT; |
| 417 | case Maxwell::VertexAttribute::Size::Size_16_16: | 427 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 418 | return VK_FORMAT_R16G16_USCALED; | 428 | return VK_FORMAT_R16G16_SINT; |
| 419 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 429 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 420 | return VK_FORMAT_R16G16B16_USCALED; | 430 | return VK_FORMAT_R16G16B16_SINT; |
| 421 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 431 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 422 | return VK_FORMAT_R16G16B16A16_USCALED; | 432 | return VK_FORMAT_R16G16B16A16_SINT; |
| 423 | default: | 433 | case Maxwell::VertexAttribute::Size::Size_32: |
| 424 | break; | 434 | return VK_FORMAT_R32_SINT; |
| 435 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 436 | return VK_FORMAT_R32G32_SINT; | ||
| 437 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 438 | return VK_FORMAT_R32G32B32_SINT; | ||
| 439 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | ||
| 440 | return VK_FORMAT_R32G32B32A32_SINT; | ||
| 441 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | ||
| 442 | return VK_FORMAT_A2B10G10R10_SINT_PACK32; | ||
| 425 | } | 443 | } |
| 426 | break; | 444 | break; |
| 427 | case Maxwell::VertexAttribute::Type::SignedScaled: | 445 | case Maxwell::VertexAttribute::Type::Float: |
| 428 | switch (size) { | 446 | switch (size) { |
| 429 | case Maxwell::VertexAttribute::Size::Size_8: | ||
| 430 | return VK_FORMAT_R8_SSCALED; | ||
| 431 | case Maxwell::VertexAttribute::Size::Size_8_8: | ||
| 432 | return VK_FORMAT_R8G8_SSCALED; | ||
| 433 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | ||
| 434 | return VK_FORMAT_R8G8B8_SSCALED; | ||
| 435 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 436 | return VK_FORMAT_R8G8B8A8_SSCALED; | ||
| 437 | case Maxwell::VertexAttribute::Size::Size_16: | 447 | case Maxwell::VertexAttribute::Size::Size_16: |
| 438 | return VK_FORMAT_R16_SSCALED; | 448 | return VK_FORMAT_R16_SFLOAT; |
| 439 | case Maxwell::VertexAttribute::Size::Size_16_16: | 449 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 440 | return VK_FORMAT_R16G16_SSCALED; | 450 | return VK_FORMAT_R16G16_SFLOAT; |
| 441 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 451 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 442 | return VK_FORMAT_R16G16B16_SSCALED; | 452 | return VK_FORMAT_R16G16B16_SFLOAT; |
| 443 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 453 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 444 | return VK_FORMAT_R16G16B16A16_SSCALED; | 454 | return VK_FORMAT_R16G16B16A16_SFLOAT; |
| 445 | default: | ||
| 446 | break; | ||
| 447 | } | ||
| 448 | break; | ||
| 449 | case Maxwell::VertexAttribute::Type::Float: | ||
| 450 | switch (size) { | ||
| 451 | case Maxwell::VertexAttribute::Size::Size_32: | 455 | case Maxwell::VertexAttribute::Size::Size_32: |
| 452 | return VK_FORMAT_R32_SFLOAT; | 456 | return VK_FORMAT_R32_SFLOAT; |
| 453 | case Maxwell::VertexAttribute::Size::Size_32_32: | 457 | case Maxwell::VertexAttribute::Size::Size_32_32: |
| @@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 456 | return VK_FORMAT_R32G32B32_SFLOAT; | 460 | return VK_FORMAT_R32G32B32_SFLOAT; |
| 457 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 461 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 458 | return VK_FORMAT_R32G32B32A32_SFLOAT; | 462 | return VK_FORMAT_R32G32B32A32_SFLOAT; |
| 459 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 460 | return VK_FORMAT_R16_SFLOAT; | ||
| 461 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 462 | return VK_FORMAT_R16G16_SFLOAT; | ||
| 463 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 464 | return VK_FORMAT_R16G16B16_SFLOAT; | ||
| 465 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 466 | return VK_FORMAT_R16G16B16A16_SFLOAT; | ||
| 467 | default: | ||
| 468 | break; | ||
| 469 | } | 463 | } |
| 470 | break; | 464 | break; |
| 471 | } | 465 | } |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index cd9673d1f..2d9b18ed9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc | |||
| 155 | } | 155 | } |
| 156 | } | 156 | } |
| 157 | 157 | ||
| 158 | static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; | 158 | std::vector<const char*> layers; |
| 159 | vk::Span<const char*> layers = layers_data; | 159 | layers.reserve(1); |
| 160 | if (!enable_layers) { | 160 | if (enable_layers) { |
| 161 | layers = {}; | 161 | layers.push_back("VK_LAYER_KHRONOS_validation"); |
| 162 | } | ||
| 163 | |||
| 164 | const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); | ||
| 165 | if (!layer_properties) { | ||
| 166 | LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); | ||
| 167 | layers.clear(); | ||
| 168 | } | ||
| 169 | |||
| 170 | for (auto layer_it = layers.begin(); layer_it != layers.end();) { | ||
| 171 | const char* const layer = *layer_it; | ||
| 172 | const auto it = std::find_if( | ||
| 173 | layer_properties->begin(), layer_properties->end(), | ||
| 174 | [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); | ||
| 175 | if (it == layer_properties->end()) { | ||
| 176 | LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); | ||
| 177 | layer_it = layers.erase(layer_it); | ||
| 178 | } else { | ||
| 179 | ++layer_it; | ||
| 180 | } | ||
| 162 | } | 181 | } |
| 182 | |||
| 163 | vk::Instance instance = vk::Instance::Create(layers, extensions, dld); | 183 | vk::Instance instance = vk::Instance::Create(layers, extensions, dld); |
| 164 | if (!instance) { | 184 | if (!instance) { |
| 165 | LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); | 185 | LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1fde38328..2be38d419 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch | |||
| 37 | 37 | ||
| 38 | } // Anonymous namespace | 38 | } // Anonymous namespace |
| 39 | 39 | ||
| 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, | 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_, |
| 41 | std::size_t size) | 41 | VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size) |
| 42 | : VideoCommon::BufferBlock{cpu_addr, size} { | 42 | : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} { |
| 43 | VkBufferCreateInfo ci; | 43 | VkBufferCreateInfo ci; |
| 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; | 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 45 | ci.pNext = nullptr; | 45 | ci.pNext = nullptr; |
| @@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp | |||
| 56 | 56 | ||
| 57 | Buffer::~Buffer() = default; | 57 | Buffer::~Buffer() = default; |
| 58 | 58 | ||
| 59 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 59 | void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) { |
| 60 | const VKDevice& device, VKMemoryManager& memory_manager, | ||
| 61 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | ||
| 62 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, | ||
| 63 | CreateStreamBuffer(device, | ||
| 64 | scheduler)}, | ||
| 65 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | ||
| 66 | staging_pool} {} | ||
| 67 | |||
| 68 | VKBufferCache::~VKBufferCache() = default; | ||
| 69 | |||
| 70 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||
| 71 | return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); | ||
| 72 | } | ||
| 73 | |||
| 74 | VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||
| 75 | size = std::max(size, std::size_t(4)); | ||
| 76 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); | ||
| 77 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 78 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { | ||
| 79 | cmdbuf.FillBuffer(buffer, 0, size, 0); | ||
| 80 | }); | ||
| 81 | return *empty.handle; | ||
| 82 | } | ||
| 83 | |||
| 84 | void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 85 | const u8* data) { | ||
| 86 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 60 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 87 | std::memcpy(staging.commit->Map(size), data, size); | 61 | std::memcpy(staging.commit->Map(size), data, size); |
| 88 | 62 | ||
| 89 | scheduler.RequestOutsideRenderPassOperationContext(); | 63 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 90 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, | 64 | |
| 91 | size](vk::CommandBuffer cmdbuf) { | 65 | const VkBuffer handle = Handle(); |
| 92 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); | 66 | scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { |
| 67 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size}); | ||
| 93 | 68 | ||
| 94 | VkBufferMemoryBarrier barrier; | 69 | VkBufferMemoryBarrier barrier; |
| 95 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 70 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| @@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 98 | barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; | 73 | barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; |
| 99 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 74 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 100 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 75 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 101 | barrier.buffer = buffer; | 76 | barrier.buffer = handle; |
| 102 | barrier.offset = offset; | 77 | barrier.offset = offset; |
| 103 | barrier.size = size; | 78 | barrier.size = size; |
| 104 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | 79 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, |
| @@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 106 | }); | 81 | }); |
| 107 | } | 82 | } |
| 108 | 83 | ||
| 109 | void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 84 | void Buffer::Download(std::size_t offset, std::size_t size, u8* data) { |
| 110 | u8* data) { | ||
| 111 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 85 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 112 | scheduler.RequestOutsideRenderPassOperationContext(); | 86 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 113 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, | 87 | |
| 114 | size](vk::CommandBuffer cmdbuf) { | 88 | const VkBuffer handle = Handle(); |
| 89 | scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) { | ||
| 115 | VkBufferMemoryBarrier barrier; | 90 | VkBufferMemoryBarrier barrier; |
| 116 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 91 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 117 | barrier.pNext = nullptr; | 92 | barrier.pNext = nullptr; |
| @@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 119 | barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | 94 | barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; |
| 120 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 95 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 121 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | 96 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 122 | barrier.buffer = buffer; | 97 | barrier.buffer = handle; |
| 123 | barrier.offset = offset; | 98 | barrier.offset = offset; |
| 124 | barrier.size = size; | 99 | barrier.size = size; |
| 125 | 100 | ||
| @@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 127 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | 102 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | |
| 128 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 103 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 129 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | 104 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); |
| 130 | cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); | 105 | cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size}); |
| 131 | }); | 106 | }); |
| 132 | scheduler.Finish(); | 107 | scheduler.Finish(); |
| 133 | 108 | ||
| 134 | std::memcpy(data, staging.commit->Map(size), size); | 109 | std::memcpy(data, staging.commit->Map(size), size); |
| 135 | } | 110 | } |
| 136 | 111 | ||
| 137 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 112 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, |
| 138 | std::size_t dst_offset, std::size_t size) { | 113 | std::size_t size) { |
| 139 | scheduler.RequestOutsideRenderPassOperationContext(); | 114 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 140 | scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, | 115 | |
| 116 | const VkBuffer dst_buffer = Handle(); | ||
| 117 | scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, | ||
| 141 | size](vk::CommandBuffer cmdbuf) { | 118 | size](vk::CommandBuffer cmdbuf) { |
| 142 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); | 119 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); |
| 143 | 120 | ||
| @@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t | |||
| 165 | }); | 142 | }); |
| 166 | } | 143 | } |
| 167 | 144 | ||
| 145 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 146 | const VKDevice& device, VKMemoryManager& memory_manager, | ||
| 147 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | ||
| 148 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, | ||
| 149 | CreateStreamBuffer(device, | ||
| 150 | scheduler)}, | ||
| 151 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | ||
| 152 | staging_pool} {} | ||
| 153 | |||
| 154 | VKBufferCache::~VKBufferCache() = default; | ||
| 155 | |||
| 156 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | ||
| 157 | return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr, | ||
| 158 | size); | ||
| 159 | } | ||
| 160 | |||
| 161 | VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { | ||
| 162 | size = std::max(size, std::size_t(4)); | ||
| 163 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); | ||
| 164 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 165 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { | ||
| 166 | cmdbuf.FillBuffer(buffer, 0, size, 0); | ||
| 167 | }); | ||
| 168 | return {*empty.handle, 0, 0}; | ||
| 169 | } | ||
| 170 | |||
| 168 | } // namespace Vulkan | 171 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 9ebbef835..991ee451c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -25,15 +25,29 @@ class VKScheduler; | |||
| 25 | 25 | ||
| 26 | class Buffer final : public VideoCommon::BufferBlock { | 26 | class Buffer final : public VideoCommon::BufferBlock { |
| 27 | public: | 27 | public: |
| 28 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, | 28 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, |
| 29 | std::size_t size); | 29 | VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size); |
| 30 | ~Buffer(); | 30 | ~Buffer(); |
| 31 | 31 | ||
| 32 | void Upload(std::size_t offset, std::size_t size, const u8* data); | ||
| 33 | |||
| 34 | void Download(std::size_t offset, std::size_t size, u8* data); | ||
| 35 | |||
| 36 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | ||
| 37 | std::size_t size); | ||
| 38 | |||
| 32 | VkBuffer Handle() const { | 39 | VkBuffer Handle() const { |
| 33 | return *buffer.handle; | 40 | return *buffer.handle; |
| 34 | } | 41 | } |
| 35 | 42 | ||
| 43 | u64 Address() const { | ||
| 44 | return 0; | ||
| 45 | } | ||
| 46 | |||
| 36 | private: | 47 | private: |
| 48 | VKScheduler& scheduler; | ||
| 49 | VKStagingBufferPool& staging_pool; | ||
| 50 | |||
| 37 | VKBuffer buffer; | 51 | VKBuffer buffer; |
| 38 | }; | 52 | }; |
| 39 | 53 | ||
| @@ -44,20 +58,11 @@ public: | |||
| 44 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); | 58 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); |
| 45 | ~VKBufferCache(); | 59 | ~VKBufferCache(); |
| 46 | 60 | ||
| 47 | VkBuffer GetEmptyBuffer(std::size_t size) override; | 61 | BufferInfo GetEmptyBuffer(std::size_t size) override; |
| 48 | 62 | ||
| 49 | protected: | 63 | protected: |
| 50 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 64 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 51 | 65 | ||
| 52 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 53 | const u8* data) override; | ||
| 54 | |||
| 55 | void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | ||
| 56 | u8* data) override; | ||
| 57 | |||
| 58 | void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | ||
| 59 | std::size_t dst_offset, std::size_t size) override; | ||
| 60 | |||
| 61 | private: | 66 | private: |
| 62 | const VKDevice& device; | 67 | const VKDevice& device; |
| 63 | VKMemoryManager& memory_manager; | 68 | VKMemoryManager& memory_manager; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index a77fa35c3..a8d94eac3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 143 | } | 143 | } |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | /// @brief Determine if an attachment to be updated has to preserve contents | ||
| 147 | /// @param is_clear True when a clear is being executed | ||
| 148 | /// @param regs 3D registers | ||
| 149 | /// @return True when the contents have to be preserved | ||
| 150 | bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { | ||
| 151 | if (!is_clear) { | ||
| 152 | return true; | ||
| 153 | } | ||
| 154 | // First we have to make sure all clear masks are enabled. | ||
| 155 | if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || | ||
| 156 | !regs.clear_buffers.A) { | ||
| 157 | return true; | ||
| 158 | } | ||
| 159 | // If scissors are disabled, the whole screen is cleared | ||
| 160 | if (!regs.clear_flags.scissor) { | ||
| 161 | return false; | ||
| 162 | } | ||
| 163 | // Then we have to confirm scissor testing clears the whole image | ||
| 164 | const std::size_t index = regs.clear_buffers.RT; | ||
| 165 | const auto& scissor = regs.scissor_test[0]; | ||
| 166 | return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || | ||
| 167 | scissor.max_y < regs.rt[index].height; | ||
| 168 | } | ||
| 169 | |||
| 170 | /// @brief Determine if an attachment to be updated has to preserve contents | ||
| 171 | /// @param is_clear True when a clear is being executed | ||
| 172 | /// @param regs 3D registers | ||
| 173 | /// @return True when the contents have to be preserved | ||
| 174 | bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { | ||
| 175 | // If we are not clearing, the contents have to be preserved | ||
| 176 | if (!is_clear) { | ||
| 177 | return true; | ||
| 178 | } | ||
| 179 | // For depth stencil clears we only have to confirm scissor test covers the whole image | ||
| 180 | if (!regs.clear_flags.scissor) { | ||
| 181 | return false; | ||
| 182 | } | ||
| 183 | // Make sure the clear cover the whole image | ||
| 184 | const auto& scissor = regs.scissor_test[0]; | ||
| 185 | return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || | ||
| 186 | scissor.max_y < regs.zeta_height; | ||
| 187 | } | ||
| 188 | |||
| 146 | } // Anonymous namespace | 189 | } // Anonymous namespace |
| 147 | 190 | ||
| 148 | class BufferBindings final { | 191 | class BufferBindings final { |
| @@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 344 | 387 | ||
| 345 | buffer_cache.Unmap(); | 388 | buffer_cache.Unmap(); |
| 346 | 389 | ||
| 347 | const Texceptions texceptions = UpdateAttachments(); | 390 | const Texceptions texceptions = UpdateAttachments(false); |
| 348 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); | 391 | SetupImageTransitions(texceptions, color_attachments, zeta_attachment); |
| 349 | 392 | ||
| 350 | key.renderpass_params = GetRenderPassParams(texceptions); | 393 | key.renderpass_params = GetRenderPassParams(texceptions); |
| @@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() { | |||
| 400 | return; | 443 | return; |
| 401 | } | 444 | } |
| 402 | 445 | ||
| 403 | [[maybe_unused]] const auto texceptions = UpdateAttachments(); | 446 | [[maybe_unused]] const auto texceptions = UpdateAttachments(true); |
| 404 | DEBUG_ASSERT(texceptions.none()); | 447 | DEBUG_ASSERT(texceptions.none()); |
| 405 | SetupImageTransitions(0, color_attachments, zeta_attachment); | 448 | SetupImageTransitions(0, color_attachments, zeta_attachment); |
| 406 | 449 | ||
| @@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() { | |||
| 677 | draw_counter = 0; | 720 | draw_counter = 0; |
| 678 | } | 721 | } |
| 679 | 722 | ||
| 680 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | 723 | RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { |
| 681 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | 724 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); |
| 682 | auto& dirty = system.GPU().Maxwell3D().dirty.flags; | 725 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 726 | auto& dirty = maxwell3d.dirty.flags; | ||
| 727 | auto& regs = maxwell3d.regs; | ||
| 728 | |||
| 683 | const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; | 729 | const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; |
| 684 | dirty[VideoCommon::Dirty::RenderTargets] = false; | 730 | dirty[VideoCommon::Dirty::RenderTargets] = false; |
| 685 | 731 | ||
| @@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | |||
| 688 | Texceptions texceptions; | 734 | Texceptions texceptions; |
| 689 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | 735 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { |
| 690 | if (update_rendertargets) { | 736 | if (update_rendertargets) { |
| 691 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); | 737 | const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); |
| 738 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); | ||
| 692 | } | 739 | } |
| 693 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | 740 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { |
| 694 | texceptions[rt] = true; | 741 | texceptions[rt] = true; |
| @@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | |||
| 696 | } | 743 | } |
| 697 | 744 | ||
| 698 | if (update_rendertargets) { | 745 | if (update_rendertargets) { |
| 699 | zeta_attachment = texture_cache.GetDepthBufferSurface(true); | 746 | const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); |
| 747 | zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); | ||
| 700 | } | 748 | } |
| 701 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | 749 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { |
| 702 | texceptions[ZETA_TEXCEPTION_INDEX] = true; | 750 | texceptions[ZETA_TEXCEPTION_INDEX] = true; |
| @@ -870,10 +918,10 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 870 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | 918 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); |
| 871 | 919 | ||
| 872 | const GPUVAddr gpu_addr = binding.Address(); | 920 | const GPUVAddr gpu_addr = binding.Address(); |
| 873 | const auto size = static_cast<VkDeviceSize>(binding.buffer_size); | 921 | const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); |
| 874 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | 922 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); |
| 875 | 923 | ||
| 876 | scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { | 924 | scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { |
| 877 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); | 925 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); |
| 878 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | 926 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); |
| 879 | }); | 927 | }); |
| @@ -925,8 +973,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 925 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); | 973 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); |
| 926 | continue; | 974 | continue; |
| 927 | } | 975 | } |
| 928 | const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); | 976 | const auto info = buffer_cache.UploadMemory(start, size); |
| 929 | buffer_bindings.AddVertexBinding(buffer, offset); | 977 | buffer_bindings.AddVertexBinding(info.handle, info.offset); |
| 930 | } | 978 | } |
| 931 | } | 979 | } |
| 932 | 980 | ||
| @@ -948,7 +996,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 948 | break; | 996 | break; |
| 949 | } | 997 | } |
| 950 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | 998 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); |
| 951 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | 999 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); |
| 1000 | VkBuffer buffer = info.handle; | ||
| 1001 | u64 offset = info.offset; | ||
| 952 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( | 1002 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( |
| 953 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | 1003 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); |
| 954 | 1004 | ||
| @@ -962,7 +1012,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 962 | break; | 1012 | break; |
| 963 | } | 1013 | } |
| 964 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | 1014 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); |
| 965 | auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | 1015 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); |
| 1016 | VkBuffer buffer = info.handle; | ||
| 1017 | u64 offset = info.offset; | ||
| 966 | 1018 | ||
| 967 | auto format = regs.index_array.format; | 1019 | auto format = regs.index_array.format; |
| 968 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | 1020 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; |
| @@ -1109,10 +1161,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | |||
| 1109 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | 1161 | Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); |
| 1110 | ASSERT(size <= MaxConstbufferSize); | 1162 | ASSERT(size <= MaxConstbufferSize); |
| 1111 | 1163 | ||
| 1112 | const auto [buffer_handle, offset] = | 1164 | const auto info = |
| 1113 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); | 1165 | buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); |
| 1114 | 1166 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | |
| 1115 | update_descriptor_queue.AddBuffer(buffer_handle, offset, size); | ||
| 1116 | } | 1167 | } |
| 1117 | 1168 | ||
| 1118 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | 1169 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { |
| @@ -1126,14 +1177,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1126 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | 1177 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the |
| 1127 | // default buffer. | 1178 | // default buffer. |
| 1128 | static constexpr std::size_t dummy_size = 4; | 1179 | static constexpr std::size_t dummy_size = 4; |
| 1129 | const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); | 1180 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); |
| 1130 | update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); | 1181 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); |
| 1131 | return; | 1182 | return; |
| 1132 | } | 1183 | } |
| 1133 | 1184 | ||
| 1134 | const auto [buffer, offset] = buffer_cache.UploadMemory( | 1185 | const auto info = buffer_cache.UploadMemory( |
| 1135 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | 1186 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); |
| 1136 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 1187 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); |
| 1137 | } | 1188 | } |
| 1138 | 1189 | ||
| 1139 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, | 1190 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, |
| @@ -1154,7 +1205,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu | |||
| 1154 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | 1205 | const auto sampler = sampler_cache.GetSampler(texture.tsc); |
| 1155 | update_descriptor_queue.AddSampledImage(sampler, image_view); | 1206 | update_descriptor_queue.AddSampledImage(sampler, image_view); |
| 1156 | 1207 | ||
| 1157 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | 1208 | VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); |
| 1158 | *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | 1209 | *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; |
| 1159 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | 1210 | sampled_views.push_back(ImageView{std::move(view), image_layout}); |
| 1160 | } | 1211 | } |
| @@ -1180,7 +1231,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima | |||
| 1180 | view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | 1231 | view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); |
| 1181 | update_descriptor_queue.AddImage(image_view); | 1232 | update_descriptor_queue.AddImage(image_view); |
| 1182 | 1233 | ||
| 1183 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | 1234 | VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout(); |
| 1184 | *image_layout = VK_IMAGE_LAYOUT_GENERAL; | 1235 | *image_layout = VK_IMAGE_LAYOUT_GENERAL; |
| 1185 | image_views.push_back(ImageView{std::move(view), image_layout}); | 1236 | image_views.push_back(ImageView{std::move(view), image_layout}); |
| 1186 | } | 1237 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c8c187606..83e00e7e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -159,7 +159,10 @@ private: | |||
| 159 | 159 | ||
| 160 | void FlushWork(); | 160 | void FlushWork(); |
| 161 | 161 | ||
| 162 | Texceptions UpdateAttachments(); | 162 | /// @brief Updates the currently bound attachments |
| 163 | /// @param is_clear True when the framebuffer is updated as a clear | ||
| 164 | /// @return Bitfield of attachments being used as sampled textures | ||
| 165 | Texceptions UpdateAttachments(bool is_clear); | ||
| 163 | 166 | ||
| 164 | std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); | 167 | std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); |
| 165 | 168 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <utility> | 9 | #include <utility> |
| 10 | 10 | ||
| 11 | #include "common/microprofile.h" | 11 | #include "common/microprofile.h" |
| 12 | #include "common/thread.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | 13 | #include "video_core/renderer_vulkan/vk_device.h" |
| 13 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 14 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { | |||
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | void VKScheduler::WorkerThread() { | 136 | void VKScheduler::WorkerThread() { |
| 137 | Common::SetCurrentThreadPriority(Common::ThreadPriority::High); | ||
| 136 | std::unique_lock lock{mutex}; | 138 | std::unique_lock lock{mutex}; |
| 137 | do { | 139 | do { |
| 138 | cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); | 140 | cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index c765c60a0..689f0d276 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -35,10 +35,14 @@ public: | |||
| 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 36 | void Unmap(u64 size); | 36 | void Unmap(u64 size); |
| 37 | 37 | ||
| 38 | VkBuffer Handle() const { | 38 | VkBuffer Handle() const noexcept { |
| 39 | return *buffer; | 39 | return *buffer; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | u64 Address() const noexcept { | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | |||
| 42 | private: | 46 | private: |
| 43 | struct Watch final { | 47 | struct Watch final { |
| 44 | VKFenceWatch fence; | 48 | VKFenceWatch fence; |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 681ecde98..351c048d2 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() { | |||
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | void VKUpdateDescriptorQueue::Acquire() { | 26 | void VKUpdateDescriptorQueue::Acquire() { |
| 27 | entries.clear(); | 27 | // Minimum number of entries required. |
| 28 | } | 28 | // This is the maximum number of entries a single draw call migth use. |
| 29 | static constexpr std::size_t MIN_ENTRIES = 0x400; | ||
| 29 | 30 | ||
| 30 | void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, | 31 | if (payload.size() + MIN_ENTRIES >= payload.max_size()) { |
| 31 | VkDescriptorSet set) { | ||
| 32 | if (payload.size() + entries.size() >= payload.max_size()) { | ||
| 33 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); | 32 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); |
| 34 | scheduler.WaitWorker(); | 33 | scheduler.WaitWorker(); |
| 35 | payload.clear(); | 34 | payload.clear(); |
| 36 | } | 35 | } |
| 36 | upload_start = &*payload.end(); | ||
| 37 | } | ||
| 37 | 38 | ||
| 38 | // TODO(Rodrigo): Rework to write the payload directly | 39 | void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, |
| 39 | const auto payload_start = payload.data() + payload.size(); | 40 | VkDescriptorSet set) { |
| 40 | for (const auto& entry : entries) { | 41 | const void* const data = upload_start; |
| 41 | if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { | 42 | const vk::Device* const logical = &device.GetLogical(); |
| 42 | payload.push_back(*image); | 43 | scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) { |
| 43 | } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { | 44 | logical->UpdateDescriptorSet(set, update_template, data); |
| 44 | payload.push_back(*buffer); | 45 | }); |
| 45 | } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { | ||
| 46 | payload.push_back(*texel); | ||
| 47 | } else { | ||
| 48 | UNREACHABLE(); | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | scheduler.Record( | ||
| 53 | [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { | ||
| 54 | logical->UpdateDescriptorSet(set, update_template, payload_start); | ||
| 55 | }); | ||
| 56 | } | 46 | } |
| 57 | 47 | ||
| 58 | } // namespace Vulkan | 48 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index cc7e3dff4..945320c72 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -15,17 +15,13 @@ namespace Vulkan { | |||
| 15 | class VKDevice; | 15 | class VKDevice; |
| 16 | class VKScheduler; | 16 | class VKScheduler; |
| 17 | 17 | ||
| 18 | class DescriptorUpdateEntry { | 18 | struct DescriptorUpdateEntry { |
| 19 | public: | 19 | DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {} |
| 20 | explicit DescriptorUpdateEntry() {} | ||
| 21 | |||
| 22 | DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} | ||
| 23 | 20 | ||
| 24 | DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} | 21 | DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {} |
| 25 | 22 | ||
| 26 | DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} | 23 | DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {} |
| 27 | 24 | ||
| 28 | private: | ||
| 29 | union { | 25 | union { |
| 30 | VkDescriptorImageInfo image; | 26 | VkDescriptorImageInfo image; |
| 31 | VkDescriptorBufferInfo buffer; | 27 | VkDescriptorBufferInfo buffer; |
| @@ -45,32 +41,34 @@ public: | |||
| 45 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); | 41 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); |
| 46 | 42 | ||
| 47 | void AddSampledImage(VkSampler sampler, VkImageView image_view) { | 43 | void AddSampledImage(VkSampler sampler, VkImageView image_view) { |
| 48 | entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); | 44 | payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); |
| 49 | } | 45 | } |
| 50 | 46 | ||
| 51 | void AddImage(VkImageView image_view) { | 47 | void AddImage(VkImageView image_view) { |
| 52 | entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); | 48 | payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); |
| 53 | } | 49 | } |
| 54 | 50 | ||
| 55 | void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { | 51 | void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { |
| 56 | entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); | 52 | payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); |
| 57 | } | 53 | } |
| 58 | 54 | ||
| 59 | void AddTexelBuffer(VkBufferView texel_buffer) { | 55 | void AddTexelBuffer(VkBufferView texel_buffer) { |
| 60 | entries.emplace_back(texel_buffer); | 56 | payload.emplace_back(texel_buffer); |
| 61 | } | 57 | } |
| 62 | 58 | ||
| 63 | VkImageLayout* GetLastImageLayout() { | 59 | VkImageLayout* LastImageLayout() { |
| 64 | return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; | 60 | return &payload.back().image.imageLayout; |
| 65 | } | 61 | } |
| 66 | 62 | ||
| 67 | private: | 63 | const VkImageLayout* LastImageLayout() const { |
| 68 | using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; | 64 | return &payload.back().image.imageLayout; |
| 65 | } | ||
| 69 | 66 | ||
| 67 | private: | ||
| 70 | const VKDevice& device; | 68 | const VKDevice& device; |
| 71 | VKScheduler& scheduler; | 69 | VKScheduler& scheduler; |
| 72 | 70 | ||
| 73 | boost::container::static_vector<Variant, 0x400> entries; | 71 | const DescriptorUpdateEntry* upload_start = nullptr; |
| 74 | boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; | 72 | boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; |
| 75 | }; | 73 | }; |
| 76 | 74 | ||
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 2ce9b0626..0d485a662 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp | |||
| @@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 153 | 153 | ||
| 154 | bool Load(InstanceDispatch& dld) noexcept { | 154 | bool Load(InstanceDispatch& dld) noexcept { |
| 155 | #define X(name) Proc(dld.name, dld, #name) | 155 | #define X(name) Proc(dld.name, dld, #name) |
| 156 | return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); | 156 | return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && |
| 157 | X(vkEnumerateInstanceLayerProperties); | ||
| 157 | #undef X | 158 | #undef X |
| 158 | } | 159 | } |
| 159 | 160 | ||
| @@ -725,8 +726,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s | |||
| 725 | return supported == VK_TRUE; | 726 | return supported == VK_TRUE; |
| 726 | } | 727 | } |
| 727 | 728 | ||
| 728 | VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const | 729 | VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const { |
| 729 | noexcept { | ||
| 730 | VkSurfaceCapabilitiesKHR capabilities; | 730 | VkSurfaceCapabilitiesKHR capabilities; |
| 731 | Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); | 731 | Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); |
| 732 | return capabilities; | 732 | return capabilities; |
| @@ -771,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp | |||
| 771 | return properties; | 771 | return properties; |
| 772 | } | 772 | } |
| 773 | 773 | ||
| 774 | std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( | ||
| 775 | const InstanceDispatch& dld) { | ||
| 776 | u32 num; | ||
| 777 | if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { | ||
| 778 | return std::nullopt; | ||
| 779 | } | ||
| 780 | std::vector<VkLayerProperties> properties(num); | ||
| 781 | if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { | ||
| 782 | return std::nullopt; | ||
| 783 | } | ||
| 784 | return properties; | ||
| 785 | } | ||
| 786 | |||
| 774 | } // namespace Vulkan::vk | 787 | } // namespace Vulkan::vk |
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 98937a77a..d56fdb3f9 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h | |||
| @@ -141,6 +141,7 @@ struct InstanceDispatch { | |||
| 141 | PFN_vkCreateInstance vkCreateInstance; | 141 | PFN_vkCreateInstance vkCreateInstance; |
| 142 | PFN_vkDestroyInstance vkDestroyInstance; | 142 | PFN_vkDestroyInstance vkDestroyInstance; |
| 143 | PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; | 143 | PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; |
| 144 | PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; | ||
| 144 | 145 | ||
| 145 | PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; | 146 | PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; |
| 146 | PFN_vkCreateDevice vkCreateDevice; | 147 | PFN_vkCreateDevice vkCreateDevice; |
| @@ -779,7 +780,7 @@ public: | |||
| 779 | 780 | ||
| 780 | bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; | 781 | bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; |
| 781 | 782 | ||
| 782 | VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; | 783 | VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const; |
| 783 | 784 | ||
| 784 | std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; | 785 | std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; |
| 785 | 786 | ||
| @@ -996,4 +997,7 @@ private: | |||
| 996 | std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( | 997 | std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( |
| 997 | const InstanceDispatch& dld); | 998 | const InstanceDispatch& dld); |
| 998 | 999 | ||
| 1000 | std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( | ||
| 1001 | const InstanceDispatch& dld); | ||
| 1002 | |||
| 999 | } // namespace Vulkan::vk | 1003 | } // namespace Vulkan::vk |
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 848e46874..b2e88fa20 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -13,55 +13,101 @@ | |||
| 13 | 13 | ||
| 14 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| 15 | 15 | ||
| 16 | using std::move; | ||
| 16 | using Tegra::Shader::Instruction; | 17 | using Tegra::Shader::Instruction; |
| 17 | using Tegra::Shader::OpCode; | 18 | using Tegra::Shader::OpCode; |
| 19 | using Tegra::Shader::PredCondition; | ||
| 18 | 20 | ||
| 19 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | 21 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { |
| 20 | const Instruction instr = {program_code[pc]}; | 22 | const Instruction instr = {program_code[pc]}; |
| 21 | const auto opcode = OpCode::Decode(instr); | 23 | const auto opcode = OpCode::Decode(instr); |
| 22 | 24 | ||
| 23 | if (instr.hset2.ftz == 0) { | 25 | PredCondition cond; |
| 24 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | 26 | bool bf; |
| 27 | bool ftz; | ||
| 28 | bool neg_a; | ||
| 29 | bool abs_a; | ||
| 30 | bool neg_b; | ||
| 31 | bool abs_b; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSET2_C: | ||
| 34 | case OpCode::Id::HSET2_IMM: | ||
| 35 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 36 | bf = instr.Bit(53); | ||
| 37 | ftz = instr.Bit(54); | ||
| 38 | neg_a = instr.Bit(43); | ||
| 39 | abs_a = instr.Bit(44); | ||
| 40 | neg_b = instr.Bit(56); | ||
| 41 | abs_b = instr.Bit(54); | ||
| 42 | break; | ||
| 43 | case OpCode::Id::HSET2_R: | ||
| 44 | cond = instr.hsetp2.reg.cond; | ||
| 45 | bf = instr.Bit(49); | ||
| 46 | ftz = instr.Bit(50); | ||
| 47 | neg_a = instr.Bit(43); | ||
| 48 | abs_a = instr.Bit(44); | ||
| 49 | neg_b = instr.Bit(31); | ||
| 50 | abs_b = instr.Bit(30); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNREACHABLE(); | ||
| 25 | } | 54 | } |
| 26 | 55 | ||
| 27 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | 56 | Node op_b = [this, instr, opcode] { |
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 29 | |||
| 30 | Node op_b = [&]() { | ||
| 31 | switch (opcode->get().GetId()) { | 57 | switch (opcode->get().GetId()) { |
| 58 | case OpCode::Id::HSET2_C: | ||
| 59 | // Inform as unimplemented as this is not tested. | ||
| 60 | UNIMPLEMENTED_MSG("HSET2_C is not implemented"); | ||
| 61 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 32 | case OpCode::Id::HSET2_R: | 62 | case OpCode::Id::HSET2_R: |
| 33 | return GetRegister(instr.gpr20); | 63 | return GetRegister(instr.gpr20); |
| 64 | case OpCode::Id::HSET2_IMM: | ||
| 65 | return UnpackHalfImmediate(instr, true); | ||
| 34 | default: | 66 | default: |
| 35 | UNREACHABLE(); | 67 | UNREACHABLE(); |
| 36 | return Immediate(0); | 68 | return Node{}; |
| 37 | } | 69 | } |
| 38 | }(); | 70 | }(); |
| 39 | op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); | ||
| 40 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | ||
| 41 | 71 | ||
| 42 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | 72 | if (!ftz) { |
| 73 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 77 | op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); | ||
| 78 | |||
| 79 | switch (opcode->get().GetId()) { | ||
| 80 | case OpCode::Id::HSET2_R: | ||
| 81 | op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); | ||
| 82 | [[fallthrough]]; | ||
| 83 | case OpCode::Id::HSET2_C: | ||
| 84 | op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); | ||
| 85 | break; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 43 | 89 | ||
| 44 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); | 90 | Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); |
| 91 | |||
| 92 | Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 45 | 93 | ||
| 46 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | 94 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); |
| 47 | 95 | ||
| 48 | // HSET2 operates on each half float in the pack. | 96 | // HSET2 operates on each half float in the pack. |
| 49 | std::array<Node, 2> values; | 97 | std::array<Node, 2> values; |
| 50 | for (u32 i = 0; i < 2; ++i) { | 98 | for (u32 i = 0; i < 2; ++i) { |
| 51 | const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; | 99 | const u32 raw_value = bf ? 0x3c00 : 0xffff; |
| 52 | const Node true_value = Immediate(raw_value << (i * 16)); | 100 | Node true_value = Immediate(raw_value << (i * 16)); |
| 53 | const Node false_value = Immediate(0); | 101 | Node false_value = Immediate(0); |
| 54 | |||
| 55 | const Node comparison = | ||
| 56 | Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 57 | const Node predicate = Operation(combiner, comparison, second_pred); | ||
| 58 | 102 | ||
| 103 | Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 104 | Node predicate = Operation(combiner, comparison, second_pred); | ||
| 59 | values[i] = | 105 | values[i] = |
| 60 | Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); | 106 | Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); |
| 61 | } | 107 | } |
| 62 | 108 | ||
| 63 | const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); | 109 | Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); |
| 64 | SetRegister(bb, instr.gpr0, value); | 110 | SetRegister(bb, instr.gpr0, move(value)); |
| 65 | 111 | ||
| 66 | return pc; | 112 | return pc; |
| 67 | } | 113 | } |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 60b6ad72a..07778dc3e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | |||
| 97 | break; | 97 | break; |
| 98 | case TextureFormat::B5G6R5: | 98 | case TextureFormat::B5G6R5: |
| 99 | case TextureFormat::B6G5R5: | 99 | case TextureFormat::B6G5R5: |
| 100 | case TextureFormat::BF10GF11RF11: | ||
| 100 | if (component == 0) { | 101 | if (component == 0) { |
| 101 | return descriptor.b_type; | 102 | return descriptor.b_type; |
| 102 | } | 103 | } |
| @@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | |||
| 119 | } | 120 | } |
| 120 | break; | 121 | break; |
| 121 | } | 122 | } |
| 122 | UNIMPLEMENTED_MSG("texture format not implement={}", format); | 123 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); |
| 123 | return ComponentType::FLOAT; | 124 | return ComponentType::FLOAT; |
| 124 | } | 125 | } |
| 125 | 126 | ||
| @@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { | |||
| 191 | return 6; | 192 | return 6; |
| 192 | } | 193 | } |
| 193 | return 0; | 194 | return 0; |
| 195 | case TextureFormat::BF10GF11RF11: | ||
| 196 | if (component == 1 || component == 2) { | ||
| 197 | return 11; | ||
| 198 | } | ||
| 199 | if (component == 0) { | ||
| 200 | return 10; | ||
| 201 | } | ||
| 202 | return 0; | ||
| 194 | case TextureFormat::G8R24: | 203 | case TextureFormat::G8R24: |
| 195 | if (component == 0) { | 204 | if (component == 0) { |
| 196 | return 8; | 205 | return 8; |
| @@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { | |||
| 211 | return (component == 0 || component == 1) ? 8 : 0; | 220 | return (component == 0 || component == 1) ? 8 : 0; |
| 212 | case TextureFormat::G4R4: | 221 | case TextureFormat::G4R4: |
| 213 | return (component == 0 || component == 1) ? 4 : 0; | 222 | return (component == 0 || component == 1) ? 4 : 0; |
| 214 | default: | ||
| 215 | UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||
| 216 | return 0; | ||
| 217 | } | 223 | } |
| 224 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 225 | return 0; | ||
| 218 | } | 226 | } |
| 219 | 227 | ||
| 220 | std::size_t GetImageComponentMask(TextureFormat format) { | 228 | std::size_t GetImageComponentMask(TextureFormat format) { |
| @@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) { | |||
| 235 | case TextureFormat::R32_B24G8: | 243 | case TextureFormat::R32_B24G8: |
| 236 | case TextureFormat::B5G6R5: | 244 | case TextureFormat::B5G6R5: |
| 237 | case TextureFormat::B6G5R5: | 245 | case TextureFormat::B6G5R5: |
| 246 | case TextureFormat::BF10GF11RF11: | ||
| 238 | return std::size_t{R | G | B}; | 247 | return std::size_t{R | G | B}; |
| 239 | case TextureFormat::R32_G32: | 248 | case TextureFormat::R32_G32: |
| 240 | case TextureFormat::R16_G16: | 249 | case TextureFormat::R16_G16: |
| @@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) { | |||
| 248 | case TextureFormat::R8: | 257 | case TextureFormat::R8: |
| 249 | case TextureFormat::R1: | 258 | case TextureFormat::R1: |
| 250 | return std::size_t{R}; | 259 | return std::size_t{R}; |
| 251 | default: | ||
| 252 | UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||
| 253 | return std::size_t{R | G | B | A}; | ||
| 254 | } | 260 | } |
| 261 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 262 | return std::size_t{R | G | B | A}; | ||
| 255 | } | 263 | } |
| 256 | 264 | ||
| 257 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | 265 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { |
| @@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, | |||
| 299 | return {std::move(original_value), true}; | 307 | return {std::move(original_value), true}; |
| 300 | } | 308 | } |
| 301 | default: | 309 | default: |
| 302 | UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); | 310 | UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); |
| 303 | return {std::move(original_value), true}; | 311 | return {std::move(original_value), true}; |
| 304 | } | 312 | } |
| 305 | } | 313 | } |
| @@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 459 | default: | 467 | default: |
| 460 | break; | 468 | break; |
| 461 | } | 469 | } |
| 462 | UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", | 470 | UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", |
| 463 | static_cast<u64>(instr.suatom_d.operation.Value()), | 471 | static_cast<u64>(instr.suatom_d.operation.Value()), |
| 464 | static_cast<u64>(instr.suatom_d.operation_type.Value())); | 472 | static_cast<u64>(instr.suatom_d.operation_type.Value())); |
| 465 | return OperationCode::AtomicImageAdd; | 473 | return OperationCode::AtomicImageAdd; |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 94d3a6ae5..0caf3b4f0 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( | |||
| 120 | } | 120 | } |
| 121 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; | 121 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; |
| 122 | const auto layer{static_cast<u32>(relative_address / layer_size)}; | 122 | const auto layer{static_cast<u32>(relative_address / layer_size)}; |
| 123 | if (layer >= params.depth) { | ||
| 124 | return {}; | ||
| 125 | } | ||
| 123 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; | 126 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; |
| 124 | const auto mipmap_it = | 127 | const auto mipmap_it = |
| 125 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | 128 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b543fc8c0..6207d8dfe 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "core/core.h" | 24 | #include "core/core.h" |
| 25 | #include "core/memory.h" | 25 | #include "core/memory.h" |
| 26 | #include "core/settings.h" | 26 | #include "core/settings.h" |
| 27 | #include "video_core/compatible_formats.h" | ||
| 27 | #include "video_core/dirty_flags.h" | 28 | #include "video_core/dirty_flags.h" |
| 28 | #include "video_core/engines/fermi_2d.h" | 29 | #include "video_core/engines/fermi_2d.h" |
| 29 | #include "video_core/engines/maxwell_3d.h" | 30 | #include "video_core/engines/maxwell_3d.h" |
| @@ -47,8 +48,8 @@ class RasterizerInterface; | |||
| 47 | 48 | ||
| 48 | namespace VideoCommon { | 49 | namespace VideoCommon { |
| 49 | 50 | ||
| 51 | using VideoCore::Surface::FormatCompatibility; | ||
| 50 | using VideoCore::Surface::PixelFormat; | 52 | using VideoCore::Surface::PixelFormat; |
| 51 | |||
| 52 | using VideoCore::Surface::SurfaceTarget; | 53 | using VideoCore::Surface::SurfaceTarget; |
| 53 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | 54 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; |
| 54 | 55 | ||
| @@ -595,7 +596,7 @@ private: | |||
| 595 | } else { | 596 | } else { |
| 596 | new_surface = GetUncachedSurface(gpu_addr, params); | 597 | new_surface = GetUncachedSurface(gpu_addr, params); |
| 597 | } | 598 | } |
| 598 | const auto& final_params = new_surface->GetSurfaceParams(); | 599 | const SurfaceParams& final_params = new_surface->GetSurfaceParams(); |
| 599 | if (cr_params.type != final_params.type) { | 600 | if (cr_params.type != final_params.type) { |
| 600 | if (Settings::IsGPULevelExtreme()) { | 601 | if (Settings::IsGPULevelExtreme()) { |
| 601 | BufferCopy(current_surface, new_surface); | 602 | BufferCopy(current_surface, new_surface); |
| @@ -603,7 +604,7 @@ private: | |||
| 603 | } else { | 604 | } else { |
| 604 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | 605 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); |
| 605 | for (auto& brick : bricks) { | 606 | for (auto& brick : bricks) { |
| 606 | ImageCopy(current_surface, new_surface, brick); | 607 | TryCopyImage(current_surface, new_surface, brick); |
| 607 | } | 608 | } |
| 608 | } | 609 | } |
| 609 | Unregister(current_surface); | 610 | Unregister(current_surface); |
| @@ -694,7 +695,7 @@ private: | |||
| 694 | } | 695 | } |
| 695 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, | 696 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, |
| 696 | src_params.depth); | 697 | src_params.depth); |
| 697 | ImageCopy(surface, new_surface, copy_params); | 698 | TryCopyImage(surface, new_surface, copy_params); |
| 698 | } | 699 | } |
| 699 | } | 700 | } |
| 700 | if (passed_tests == 0) { | 701 | if (passed_tests == 0) { |
| @@ -791,7 +792,7 @@ private: | |||
| 791 | const u32 width = params.width; | 792 | const u32 width = params.width; |
| 792 | const u32 height = params.height; | 793 | const u32 height = params.height; |
| 793 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | 794 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); |
| 794 | ImageCopy(surface, new_surface, copy_params); | 795 | TryCopyImage(surface, new_surface, copy_params); |
| 795 | } | 796 | } |
| 796 | for (const auto& surface : overlaps) { | 797 | for (const auto& surface : overlaps) { |
| 797 | Unregister(surface); | 798 | Unregister(surface); |
| @@ -1053,7 +1054,7 @@ private: | |||
| 1053 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, | 1054 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, |
| 1054 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { | 1055 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { |
| 1055 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); | 1056 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); |
| 1056 | auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); | 1057 | auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); |
| 1057 | if (deduced_src.Failed() || deduced_dst.Failed()) { | 1058 | if (deduced_src.Failed() || deduced_dst.Failed()) { |
| 1058 | return; | 1059 | return; |
| 1059 | } | 1060 | } |
| @@ -1192,6 +1193,19 @@ private: | |||
| 1192 | return {}; | 1193 | return {}; |
| 1193 | } | 1194 | } |
| 1194 | 1195 | ||
| 1196 | /// Try to do an image copy logging when formats are incompatible. | ||
| 1197 | void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) { | ||
| 1198 | const SurfaceParams& src_params = src->GetSurfaceParams(); | ||
| 1199 | const SurfaceParams& dst_params = dst->GetSurfaceParams(); | ||
| 1200 | if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) { | ||
| 1201 | LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}", | ||
| 1202 | static_cast<int>(dst_params.pixel_format), | ||
| 1203 | static_cast<int>(src_params.pixel_format)); | ||
| 1204 | return; | ||
| 1205 | } | ||
| 1206 | ImageCopy(src, dst, copy); | ||
| 1207 | } | ||
| 1208 | |||
| 1195 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { | 1209 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { |
| 1196 | return siblings_table[static_cast<std::size_t>(format)]; | 1210 | return siblings_table[static_cast<std::size_t>(format)]; |
| 1197 | } | 1211 | } |
| @@ -1241,6 +1255,7 @@ private: | |||
| 1241 | VideoCore::RasterizerInterface& rasterizer; | 1255 | VideoCore::RasterizerInterface& rasterizer; |
| 1242 | 1256 | ||
| 1243 | FormatLookupTable format_lookup_table; | 1257 | FormatLookupTable format_lookup_table; |
| 1258 | FormatCompatibility format_compatibility; | ||
| 1244 | 1259 | ||
| 1245 | u64 ticks{}; | 1260 | u64 ticks{}; |
| 1246 | 1261 | ||