summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h53
-rw-r--r--src/video_core/compatible_formats.cpp162
-rw-r--r--src/video_core/compatible_formats.h32
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_synch.cpp8
-rw-r--r--src/video_core/gpu_synch.h2
-rw-r--r--src/video_core/gpu_thread.cpp7
-rw-r--r--src/video_core/macro/macro.cpp60
-rw-r--r--src/video_core/macro/macro.h19
-rw-r--r--src/video_core/macro/macro_hle.cpp113
-rw-r--r--src/video_core/macro/macro_hle.h44
-rw-r--r--src/video_core/macro/macro_interpreter.cpp3
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp7
-rw-r--r--src/video_core/memory_manager.cpp40
-rw-r--r--src/video_core/memory_manager.h12
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp69
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h37
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp75
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp56
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h16
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h44
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp17
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp144
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp89
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp95
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp19
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h6
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/shader/decode/image.cpp26
-rw-r--r--src/video_core/texture_cache/surface_base.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h27
51 files changed, 1164 insertions, 479 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 099bb446e..21c46a567 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -3,6 +3,8 @@ add_library(video_core STATIC
3 buffer_cache/buffer_cache.h 3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.cpp 4 buffer_cache/map_interval.cpp
5 buffer_cache/map_interval.h 5 buffer_cache/map_interval.h
6 compatible_formats.cpp
7 compatible_formats.h
6 dirty_flags.cpp 8 dirty_flags.cpp
7 dirty_flags.h 9 dirty_flags.h
8 dma_pusher.cpp 10 dma_pusher.cpp
@@ -27,6 +29,8 @@ add_library(video_core STATIC
27 engines/shader_type.h 29 engines/shader_type.h
28 macro/macro.cpp 30 macro/macro.cpp
29 macro/macro.h 31 macro/macro.h
32 macro/macro_hle.cpp
33 macro/macro_hle.h
30 macro/macro_interpreter.cpp 34 macro/macro_interpreter.cpp
31 macro/macro_interpreter.h 35 macro/macro_interpreter.h
32 macro/macro_jit_x64.cpp 36 macro/macro_jit_x64.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 308d8b55f..c6479af9f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -41,16 +41,20 @@ class BufferCache {
41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; 41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
42 42
43public: 43public:
44 using BufferInfo = std::pair<BufferType, u64>; 44 struct BufferInfo {
45 BufferType handle;
46 u64 offset;
47 u64 address;
48 };
45 49
46 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 50 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
47 bool is_written = false, bool use_fast_cbuf = false) { 51 bool is_written = false, bool use_fast_cbuf = false) {
48 std::lock_guard lock{mutex}; 52 std::lock_guard lock{mutex};
49 53
50 const auto& memory_manager = system.GPU().MemoryManager(); 54 auto& memory_manager = system.GPU().MemoryManager();
51 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); 55 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
52 if (!cpu_addr_opt) { 56 if (!cpu_addr_opt) {
53 return {GetEmptyBuffer(size), 0}; 57 return GetEmptyBuffer(size);
54 } 58 }
55 const VAddr cpu_addr = *cpu_addr_opt; 59 const VAddr cpu_addr = *cpu_addr_opt;
56 60
@@ -59,7 +63,6 @@ public:
59 constexpr std::size_t max_stream_size = 0x800; 63 constexpr std::size_t max_stream_size = 0x800;
60 if (use_fast_cbuf || size < max_stream_size) { 64 if (use_fast_cbuf || size < max_stream_size) {
61 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { 65 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
62 auto& memory_manager = system.GPU().MemoryManager();
63 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); 66 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
64 if (use_fast_cbuf) { 67 if (use_fast_cbuf) {
65 u8* dest; 68 u8* dest;
@@ -89,7 +92,7 @@ public:
89 Buffer* const block = GetBlock(cpu_addr, size); 92 Buffer* const block = GetBlock(cpu_addr, size);
90 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); 93 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
91 if (!map) { 94 if (!map) {
92 return {GetEmptyBuffer(size), 0}; 95 return GetEmptyBuffer(size);
93 } 96 }
94 if (is_written) { 97 if (is_written) {
95 map->MarkAsModified(true, GetModifiedTicks()); 98 map->MarkAsModified(true, GetModifiedTicks());
@@ -102,7 +105,7 @@ public:
102 } 105 }
103 } 106 }
104 107
105 return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; 108 return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
106 } 109 }
107 110
108 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. 111 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -255,27 +258,17 @@ public:
255 committed_flushes.pop_front(); 258 committed_flushes.pop_front();
256 } 259 }
257 260
258 virtual BufferType GetEmptyBuffer(std::size_t size) = 0; 261 virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
259 262
260protected: 263protected:
261 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 264 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
262 std::unique_ptr<StreamBuffer> stream_buffer_) 265 std::unique_ptr<StreamBuffer> stream_buffer)
263 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, 266 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
264 stream_buffer_handle{stream_buffer->Handle()} {}
265 267
266 ~BufferCache() = default; 268 ~BufferCache() = default;
267 269
268 virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; 270 virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
269 271
270 virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
271 const u8* data) = 0;
272
273 virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
274 u8* data) = 0;
275
276 virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
277 std::size_t dst_offset, std::size_t size) = 0;
278
279 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { 272 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
280 return {}; 273 return {};
281 } 274 }
@@ -329,19 +322,18 @@ protected:
329 } 322 }
330 323
331private: 324private:
332 MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, 325 MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
333 std::size_t size) {
334 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); 326 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
335 if (overlaps.empty()) { 327 if (overlaps.empty()) {
336 auto& memory_manager = system.GPU().MemoryManager(); 328 auto& memory_manager = system.GPU().MemoryManager();
337 const VAddr cpu_addr_end = cpu_addr + size; 329 const VAddr cpu_addr_end = cpu_addr + size;
338 if (memory_manager.IsGranularRange(gpu_addr, size)) { 330 if (memory_manager.IsGranularRange(gpu_addr, size)) {
339 u8* host_ptr = memory_manager.GetPointer(gpu_addr); 331 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
340 UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); 332 block->Upload(block->Offset(cpu_addr), size, host_ptr);
341 } else { 333 } else {
342 staging_buffer.resize(size); 334 staging_buffer.resize(size);
343 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 335 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
344 UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); 336 block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
345 } 337 }
346 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); 338 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
347 } 339 }
@@ -384,8 +376,7 @@ private:
384 return map; 376 return map;
385 } 377 }
386 378
387 void UpdateBlock(const Buffer* block, VAddr start, VAddr end, 379 void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) {
388 const VectorMapInterval& overlaps) {
389 const IntervalType base_interval{start, end}; 380 const IntervalType base_interval{start, end};
390 IntervalSet interval_set{}; 381 IntervalSet interval_set{};
391 interval_set.add(base_interval); 382 interval_set.add(base_interval);
@@ -400,7 +391,7 @@ private:
400 } 391 }
401 staging_buffer.resize(size); 392 staging_buffer.resize(size);
402 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); 393 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
403 UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); 394 block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
404 } 395 }
405 } 396 }
406 397
@@ -437,7 +428,7 @@ private:
437 428
438 const std::size_t size = map->end - map->start; 429 const std::size_t size = map->end - map->start;
439 staging_buffer.resize(size); 430 staging_buffer.resize(size);
440 DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); 431 block->Download(block->Offset(map->start), size, staging_buffer.data());
441 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); 432 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
442 map->MarkAsModified(false, 0); 433 map->MarkAsModified(false, 0);
443 } 434 }
@@ -450,7 +441,7 @@ private:
450 441
451 buffer_ptr += size; 442 buffer_ptr += size;
452 buffer_offset += size; 443 buffer_offset += size;
453 return {stream_buffer_handle, uploaded_offset}; 444 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
454 } 445 }
455 446
456 void AlignBuffer(std::size_t alignment) { 447 void AlignBuffer(std::size_t alignment) {
@@ -465,7 +456,7 @@ private:
465 const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; 456 const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
466 const VAddr cpu_addr = buffer->CpuAddr(); 457 const VAddr cpu_addr = buffer->CpuAddr();
467 std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); 458 std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
468 CopyBlock(*buffer, *new_buffer, 0, 0, old_size); 459 new_buffer->CopyFrom(*buffer, 0, 0, old_size);
469 QueueDestruction(std::move(buffer)); 460 QueueDestruction(std::move(buffer));
470 461
471 const VAddr cpu_addr_end = cpu_addr + new_size - 1; 462 const VAddr cpu_addr_end = cpu_addr + new_size - 1;
@@ -487,8 +478,8 @@ private:
487 const std::size_t new_size = size_1 + size_2; 478 const std::size_t new_size = size_1 + size_2;
488 479
489 std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); 480 std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
490 CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); 481 new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
491 CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); 482 new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
492 QueueDestruction(std::move(first)); 483 QueueDestruction(std::move(first));
493 QueueDestruction(std::move(second)); 484 QueueDestruction(std::move(second));
494 485
diff --git a/src/video_core/compatible_formats.cpp b/src/video_core/compatible_formats.cpp
new file mode 100644
index 000000000..6c426b035
--- /dev/null
+++ b/src/video_core/compatible_formats.cpp
@@ -0,0 +1,162 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bitset>
7#include <cstddef>
8
9#include "video_core/compatible_formats.h"
10#include "video_core/surface.h"
11
12namespace VideoCore::Surface {
13
14namespace {
15
16// Compatibility table taken from Table 3.X.2 in:
17// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
18
19constexpr std::array VIEW_CLASS_128_BITS = {
20 PixelFormat::RGBA32F,
21 PixelFormat::RGBA32UI,
22};
23// Missing formats:
24// PixelFormat::RGBA32I
25
26constexpr std::array VIEW_CLASS_96_BITS = {
27 PixelFormat::RGB32F,
28};
29// Missing formats:
30// PixelFormat::RGB32UI,
31// PixelFormat::RGB32I,
32
33constexpr std::array VIEW_CLASS_64_BITS = {
34 PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
35 PixelFormat::RGBA16U, PixelFormat::RGBA16F, PixelFormat::RGBA16S,
36};
37// Missing formats:
38// PixelFormat::RGBA16I
39// PixelFormat::RG32I
40
41// TODO: How should we handle 48 bits?
42
43constexpr std::array VIEW_CLASS_32_BITS = {
44 PixelFormat::RG16F, PixelFormat::R11FG11FB10F, PixelFormat::R32F,
45 PixelFormat::A2B10G10R10U, PixelFormat::RG16UI, PixelFormat::R32UI,
46 PixelFormat::RG16I, PixelFormat::R32I, PixelFormat::ABGR8U,
47 PixelFormat::RG16, PixelFormat::ABGR8S, PixelFormat::RG16S,
48 PixelFormat::RGBA8_SRGB, PixelFormat::E5B9G9R9F, PixelFormat::BGRA8,
49 PixelFormat::BGRA8_SRGB,
50};
51// Missing formats:
52// PixelFormat::RGBA8UI
53// PixelFormat::RGBA8I
54// PixelFormat::RGB10_A2_UI
55
56// TODO: How should we handle 24 bits?
57
58constexpr std::array VIEW_CLASS_16_BITS = {
59 PixelFormat::R16F, PixelFormat::RG8UI, PixelFormat::R16UI, PixelFormat::R16I,
60 PixelFormat::RG8U, PixelFormat::R16U, PixelFormat::RG8S, PixelFormat::R16S,
61};
62// Missing formats:
63// PixelFormat::RG8I
64
65constexpr std::array VIEW_CLASS_8_BITS = {
66 PixelFormat::R8UI,
67 PixelFormat::R8U,
68};
69// Missing formats:
70// PixelFormat::R8I
71// PixelFormat::R8S
72
73constexpr std::array VIEW_CLASS_RGTC1_RED = {
74 PixelFormat::DXN1,
75};
76// Missing formats:
77// COMPRESSED_SIGNED_RED_RGTC1
78
79constexpr std::array VIEW_CLASS_RGTC2_RG = {
80 PixelFormat::DXN2UNORM,
81 PixelFormat::DXN2SNORM,
82};
83
84constexpr std::array VIEW_CLASS_BPTC_UNORM = {
85 PixelFormat::BC7U,
86 PixelFormat::BC7U_SRGB,
87};
88
89constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
90 PixelFormat::BC6H_SF16,
91 PixelFormat::BC6H_UF16,
92};
93
94// Compatibility table taken from Table 4.X.1 in:
95// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
96
97constexpr std::array COPY_CLASS_128_BITS = {
98 PixelFormat::RGBA32UI, PixelFormat::RGBA32F, PixelFormat::DXT23,
99 PixelFormat::DXT23_SRGB, PixelFormat::DXT45, PixelFormat::DXT45_SRGB,
100 PixelFormat::DXN2SNORM, PixelFormat::BC7U, PixelFormat::BC7U_SRGB,
101 PixelFormat::BC6H_SF16, PixelFormat::BC6H_UF16,
102};
103// Missing formats:
104// PixelFormat::RGBA32I
105// COMPRESSED_RG_RGTC2
106
107constexpr std::array COPY_CLASS_64_BITS = {
108 PixelFormat::RGBA16F, PixelFormat::RG32F, PixelFormat::RGBA16UI, PixelFormat::RG32UI,
109 PixelFormat::RGBA16U, PixelFormat::RGBA16S, PixelFormat::DXT1_SRGB, PixelFormat::DXT1,
110
111};
112// Missing formats:
113// PixelFormat::RGBA16I
114// PixelFormat::RG32I,
115// COMPRESSED_RGB_S3TC_DXT1_EXT
116// COMPRESSED_SRGB_S3TC_DXT1_EXT
117// COMPRESSED_RGBA_S3TC_DXT1_EXT
118// COMPRESSED_SIGNED_RED_RGTC1
119
120void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
121 compatiblity[format_a][format_b] = true;
122 compatiblity[format_b][format_a] = true;
123}
124
125void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
126 Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
127}
128
129template <typename Range>
130void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
131 for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
132 for (auto it_b = it_a; it_b != range.end(); ++it_b) {
133 Enable(compatibility, *it_a, *it_b);
134 }
135 }
136}
137
138} // Anonymous namespace
139
140FormatCompatibility::FormatCompatibility() {
141 for (size_t i = 0; i < MaxPixelFormat; ++i) {
142 // Identity is allowed
143 Enable(view, i, i);
144 }
145
146 EnableRange(view, VIEW_CLASS_128_BITS);
147 EnableRange(view, VIEW_CLASS_96_BITS);
148 EnableRange(view, VIEW_CLASS_64_BITS);
149 EnableRange(view, VIEW_CLASS_32_BITS);
150 EnableRange(view, VIEW_CLASS_16_BITS);
151 EnableRange(view, VIEW_CLASS_8_BITS);
152 EnableRange(view, VIEW_CLASS_RGTC1_RED);
153 EnableRange(view, VIEW_CLASS_RGTC2_RG);
154 EnableRange(view, VIEW_CLASS_BPTC_UNORM);
155 EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
156
157 copy = view;
158 EnableRange(copy, COPY_CLASS_128_BITS);
159 EnableRange(copy, COPY_CLASS_64_BITS);
160}
161
162} // namespace VideoCore::Surface
diff --git a/src/video_core/compatible_formats.h b/src/video_core/compatible_formats.h
new file mode 100644
index 000000000..d1082566d
--- /dev/null
+++ b/src/video_core/compatible_formats.h
@@ -0,0 +1,32 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bitset>
7#include <cstddef>
8
9#include "video_core/surface.h"
10
11namespace VideoCore::Surface {
12
13class FormatCompatibility {
14public:
15 using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
16
17 explicit FormatCompatibility();
18
19 bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
20 return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
21 }
22
23 bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
24 return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
25 }
26
27private:
28 Table view;
29 Table copy;
30};
31
32} // namespace VideoCore::Surface
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ea3c8a963..c01436295 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -128,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters)
128 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); 128 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
129 129
130 // Execute the current macro. 130 // Execute the current macro.
131 macro_engine->Execute(macro_positions[entry], parameters); 131 macro_engine->Execute(*this, macro_positions[entry], parameters);
132 if (mme_draw.current_mode != MMEDrawMode::Undefined) { 132 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
133 FlushMMEInlineDraw(); 133 FlushMMEInlineDraw();
134 } 134 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d5fe25065..ef1618990 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1418,6 +1418,14 @@ public:
1418 return execute_on; 1418 return execute_on;
1419 } 1419 }
1420 1420
1421 VideoCore::RasterizerInterface& GetRasterizer() {
1422 return rasterizer;
1423 }
1424
1425 const VideoCore::RasterizerInterface& GetRasterizer() const {
1426 return rasterizer;
1427 }
1428
1421 /// Notify a memory write has happened. 1429 /// Notify a memory write has happened.
1422 void OnMemoryWrite() { 1430 void OnMemoryWrite() {
1423 dirty.flags |= dirty.on_write_stores; 1431 dirty.flags |= dirty.on_write_stores;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
661 constexpr Instruction(u64 value) : value{value} {} 661 constexpr Instruction(u64 value) : value{value} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {} 662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663 663
664 constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
664 BitField<0, 8, Register> gpr0; 668 BitField<0, 8, Register> gpr0;
665 BitField<8, 8, Register> gpr8; 669 BitField<8, 8, Register> gpr8;
666 union { 670 union {
@@ -1874,7 +1878,9 @@ public:
1874 HSETP2_C, 1878 HSETP2_C,
1875 HSETP2_R, 1879 HSETP2_R,
1876 HSETP2_IMM, 1880 HSETP2_IMM,
1881 HSET2_C,
1877 HSET2_R, 1882 HSET2_R,
1883 HSET2_IMM,
1878 POPC_C, 1884 POPC_C,
1879 POPC_R, 1885 POPC_R,
1880 POPC_IMM, 1886 POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
2194 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), 2200 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2195 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), 2201 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2196 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), 2202 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2203 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2197 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 2204 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2205 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2198 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), 2206 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2199 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), 2207 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2200 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 2208 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8eb017f65..482e49711 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <chrono>
6
5#include "common/assert.h" 7#include "common/assert.h"
6#include "common/microprofile.h" 8#include "common/microprofile.h"
7#include "core/core.h" 9#include "core/core.h"
@@ -154,8 +156,7 @@ u64 GPU::GetTicks() const {
154 constexpr u64 gpu_ticks_num = 384; 156 constexpr u64 gpu_ticks_num = 384;
155 constexpr u64 gpu_ticks_den = 625; 157 constexpr u64 gpu_ticks_den = 625;
156 158
157 const u64 cpu_ticks = system.CoreTiming().GetTicks(); 159 u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
158 u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
159 if (Settings::values.use_fast_gpu_time) { 160 if (Settings::values.use_fast_gpu_time) {
160 nanoseconds /= 256; 161 nanoseconds /= 256;
161 } 162 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a1b4c305c..2c42483bd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -284,6 +284,12 @@ public:
284 /// core timing events. 284 /// core timing events.
285 virtual void Start() = 0; 285 virtual void Start() = 0;
286 286
287 /// Obtain the CPU Context
288 virtual void ObtainContext() = 0;
289
290 /// Release the CPU Context
291 virtual void ReleaseContext() = 0;
292
287 /// Push GPU command entries to be processed 293 /// Push GPU command entries to be processed
288 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 294 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
289 295
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 53305ab43..7b855f63e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa
19GPUAsynch::~GPUAsynch() = default; 19GPUAsynch::~GPUAsynch() = default;
20 20
21void GPUAsynch::Start() { 21void GPUAsynch::Start() {
22 cpu_context->MakeCurrent();
23 gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); 22 gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
24} 23}
25 24
25void GPUAsynch::ObtainContext() {
26 cpu_context->MakeCurrent();
27}
28
29void GPUAsynch::ReleaseContext() {
30 cpu_context->DoneCurrent();
31}
32
26void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { 33void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
27 gpu_thread.SubmitList(std::move(entries)); 34 gpu_thread.SubmitList(std::move(entries));
28} 35}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 517658612..15e9f1d38 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,8 @@ public:
25 ~GPUAsynch() override; 25 ~GPUAsynch() override;
26 26
27 void Start() override; 27 void Start() override;
28 void ObtainContext() override;
29 void ReleaseContext() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override; 30 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 31 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
30 void FlushRegion(VAddr addr, u64 size) override; 32 void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 6f38a672a..aaeb9811d 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase
13 13
14GPUSynch::~GPUSynch() = default; 14GPUSynch::~GPUSynch() = default;
15 15
16void GPUSynch::Start() { 16void GPUSynch::Start() {}
17
18void GPUSynch::ObtainContext() {
17 context->MakeCurrent(); 19 context->MakeCurrent();
18} 20}
19 21
22void GPUSynch::ReleaseContext() {
23 context->DoneCurrent();
24}
25
20void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { 26void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
21 dma_pusher->Push(std::move(entries)); 27 dma_pusher->Push(std::move(entries));
22 dma_pusher->DispatchCalls(); 28 dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 4a6e9a01d..762c20aa5 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,8 @@ public:
24 ~GPUSynch() override; 24 ~GPUSynch() override;
25 25
26 void Start() override; 26 void Start() override;
27 void ObtainContext() override;
28 void ReleaseContext() override;
27 void PushGPUEntries(Tegra::CommandList&& entries) override; 29 void PushGPUEntries(Tegra::CommandList&& entries) override;
28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 30 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
29 void FlushRegion(VAddr addr, u64 size) override; 31 void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c3bb4fe06..738c6f0c1 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "common/thread.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/frontend/emu_window.h" 9#include "core/frontend/emu_window.h"
9#include "core/settings.h" 10#include "core/settings.h"
@@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread {
18static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, 19static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
19 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, 20 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
20 SynchState& state) { 21 SynchState& state) {
21 MicroProfileOnThreadCreate("GpuThread"); 22 std::string name = "yuzu:GPU";
23 MicroProfileOnThreadCreate(name.c_str());
24 Common::SetCurrentThreadName(name.c_str());
25 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
26 system.RegisterHostThread();
22 27
23 // Wait for first GPU command before acquiring the window context 28 // Wait for first GPU command before acquiring the window context
24 while (state.queue.Empty()) 29 while (state.queue.Empty())
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 89077a2d8..a50e7b4e0 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,32 +2,78 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <optional>
6#include <boost/container_hash/hash.hpp>
5#include "common/assert.h" 7#include "common/assert.h"
6#include "common/logging/log.h" 8#include "common/logging/log.h"
7#include "core/settings.h" 9#include "core/settings.h"
10#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro/macro.h" 11#include "video_core/macro/macro.h"
12#include "video_core/macro/macro_hle.h"
9#include "video_core/macro/macro_interpreter.h" 13#include "video_core/macro/macro_interpreter.h"
10#include "video_core/macro/macro_jit_x64.h" 14#include "video_core/macro/macro_jit_x64.h"
11 15
12namespace Tegra { 16namespace Tegra {
13 17
18MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
19 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
20
21MacroEngine::~MacroEngine() = default;
22
14void MacroEngine::AddCode(u32 method, u32 data) { 23void MacroEngine::AddCode(u32 method, u32 data) {
15 uploaded_macro_code[method].push_back(data); 24 uploaded_macro_code[method].push_back(data);
16} 25}
17 26
18void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { 27void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
28 const std::vector<u32>& parameters) {
19 auto compiled_macro = macro_cache.find(method); 29 auto compiled_macro = macro_cache.find(method);
20 if (compiled_macro != macro_cache.end()) { 30 if (compiled_macro != macro_cache.end()) {
21 compiled_macro->second->Execute(parameters, method); 31 const auto& cache_info = compiled_macro->second;
32 if (cache_info.has_hle_program) {
33 cache_info.hle_program->Execute(parameters, method);
34 } else {
35 cache_info.lle_program->Execute(parameters, method);
36 }
22 } else { 37 } else {
23 // Macro not compiled, check if it's uploaded and if so, compile it 38 // Macro not compiled, check if it's uploaded and if so, compile it
24 auto macro_code = uploaded_macro_code.find(method); 39 std::optional<u32> mid_method = std::nullopt;
40 const auto macro_code = uploaded_macro_code.find(method);
25 if (macro_code == uploaded_macro_code.end()) { 41 if (macro_code == uploaded_macro_code.end()) {
26 UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); 42 for (const auto& [method_base, code] : uploaded_macro_code) {
27 return; 43 if (method >= method_base && (method - method_base) < code.size()) {
44 mid_method = method_base;
45 break;
46 }
47 }
48 if (!mid_method.has_value()) {
49 UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
50 return;
51 }
52 }
53 auto& cache_info = macro_cache[method];
54
55 if (!mid_method.has_value()) {
56 cache_info.lle_program = Compile(macro_code->second);
57 cache_info.hash = boost::hash_value(macro_code->second);
58 } else {
59 const auto& macro_cached = uploaded_macro_code[mid_method.value()];
60 const auto rebased_method = method - mid_method.value();
61 auto& code = uploaded_macro_code[method];
62 code.resize(macro_cached.size() - rebased_method);
63 std::memcpy(code.data(), macro_cached.data() + rebased_method,
64 code.size() * sizeof(u32));
65 cache_info.hash = boost::hash_value(code);
66 cache_info.lle_program = Compile(code);
67 }
68
69 auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
70 if (hle_program.has_value()) {
71 cache_info.has_hle_program = true;
72 cache_info.hle_program = std::move(hle_program.value());
73 cache_info.hle_program->Execute(parameters, method);
74 } else {
75 cache_info.lle_program->Execute(parameters, method);
28 } 76 }
29 macro_cache[method] = Compile(macro_code->second);
30 macro_cache[method]->Execute(parameters, method);
31 } 77 }
32} 78}
33 79
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
index b76ed891f..4d00b84b0 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro/macro.h
@@ -11,9 +11,11 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13namespace Tegra { 13namespace Tegra {
14
14namespace Engines { 15namespace Engines {
15class Maxwell3D; 16class Maxwell3D;
16} 17}
18
17namespace Macro { 19namespace Macro {
18constexpr std::size_t NUM_MACRO_REGISTERS = 8; 20constexpr std::size_t NUM_MACRO_REGISTERS = 8;
19enum class Operation : u32 { 21enum class Operation : u32 {
@@ -94,6 +96,8 @@ union MethodAddress {
94 96
95} // namespace Macro 97} // namespace Macro
96 98
99class HLEMacro;
100
97class CachedMacro { 101class CachedMacro {
98public: 102public:
99 virtual ~CachedMacro() = default; 103 virtual ~CachedMacro() = default;
@@ -107,20 +111,29 @@ public:
107 111
108class MacroEngine { 112class MacroEngine {
109public: 113public:
110 virtual ~MacroEngine() = default; 114 explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
115 virtual ~MacroEngine();
111 116
112 // Store the uploaded macro code to compile them when they're called. 117 // Store the uploaded macro code to compile them when they're called.
113 void AddCode(u32 method, u32 data); 118 void AddCode(u32 method, u32 data);
114 119
115 // Compiles the macro if its not in the cache, and executes the compiled macro 120 // Compiles the macro if its not in the cache, and executes the compiled macro
116 void Execute(u32 method, const std::vector<u32>& parameters); 121 void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
117 122
118protected: 123protected:
119 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; 124 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
120 125
121private: 126private:
122 std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; 127 struct CacheInfo {
128 std::unique_ptr<CachedMacro> lle_program{};
129 std::unique_ptr<CachedMacro> hle_program{};
130 u64 hash{};
131 bool has_hle_program{};
132 };
133
134 std::unordered_map<u32, CacheInfo> macro_cache;
123 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; 135 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
136 std::unique_ptr<HLEMacro> hle_macros;
124}; 137};
125 138
126std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); 139std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
new file mode 100644
index 000000000..410f99018
--- /dev/null
+++ b/src/video_core/macro/macro_hle.cpp
@@ -0,0 +1,113 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <vector>
7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro/macro_hle.h"
9#include "video_core/rasterizer_interface.h"
10
11namespace Tegra {
12
13namespace {
14// HLE'd functions
15static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
16 const std::vector<u32>& parameters) {
17 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
18
19 maxwell3d.regs.draw.topology.Assign(
20 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
21 ~(0x3ffffff << 26)));
22 maxwell3d.regs.vb_base_instance = parameters[5];
23 maxwell3d.mme_draw.instance_count = instance_count;
24 maxwell3d.regs.vb_element_base = parameters[3];
25 maxwell3d.regs.index_array.count = parameters[1];
26 maxwell3d.regs.index_array.first = parameters[4];
27
28 if (maxwell3d.ShouldExecute()) {
29 maxwell3d.GetRasterizer().Draw(true, true);
30 }
31 maxwell3d.regs.index_array.count = 0;
32 maxwell3d.mme_draw.instance_count = 0;
33 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
34}
35
36static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
37 const std::vector<u32>& parameters) {
38 const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
39
40 maxwell3d.regs.vertex_buffer.first = parameters[3];
41 maxwell3d.regs.vertex_buffer.count = parameters[1];
42 maxwell3d.regs.vb_base_instance = parameters[4];
43 maxwell3d.regs.draw.topology.Assign(
44 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
45 maxwell3d.mme_draw.instance_count = count;
46
47 if (maxwell3d.ShouldExecute()) {
48 maxwell3d.GetRasterizer().Draw(false, true);
49 }
50 maxwell3d.regs.vertex_buffer.count = 0;
51 maxwell3d.mme_draw.instance_count = 0;
52 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
53}
54
55static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
56 const std::vector<u32>& parameters) {
57 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
58 const u32 element_base = parameters[4];
59 const u32 base_instance = parameters[5];
60 maxwell3d.regs.index_array.first = parameters[3];
61 maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
62 maxwell3d.regs.index_array.count = parameters[1];
63 maxwell3d.regs.vb_element_base = element_base;
64 maxwell3d.regs.vb_base_instance = base_instance;
65 maxwell3d.mme_draw.instance_count = instance_count;
66 maxwell3d.CallMethodFromMME(0x8e3, 0x640);
67 maxwell3d.CallMethodFromMME(0x8e4, element_base);
68 maxwell3d.CallMethodFromMME(0x8e5, base_instance);
69 maxwell3d.regs.draw.topology.Assign(
70 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
71 if (maxwell3d.ShouldExecute()) {
72 maxwell3d.GetRasterizer().Draw(true, true);
73 }
74 maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
75 maxwell3d.regs.index_array.count = 0;
76 maxwell3d.regs.vb_element_base = 0x0;
77 maxwell3d.regs.vb_base_instance = 0x0;
78 maxwell3d.mme_draw.instance_count = 0;
79 maxwell3d.CallMethodFromMME(0x8e3, 0x640);
80 maxwell3d.CallMethodFromMME(0x8e4, 0x0);
81 maxwell3d.CallMethodFromMME(0x8e5, 0x0);
82 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
83}
84} // namespace
85
86constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
87 std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
88 std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
89 std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
90}};
91
92HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
93HLEMacro::~HLEMacro() = default;
94
95std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
96 const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
97 [hash](const auto& pair) { return pair.first == hash; });
98 if (it == hle_funcs.end()) {
99 return std::nullopt;
100 }
101 return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
102}
103
104HLEMacroImpl::~HLEMacroImpl() = default;
105
106HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
107 : maxwell3d(maxwell3d), func(func) {}
108
109void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
110 func(maxwell3d, parameters);
111}
112
113} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
new file mode 100644
index 000000000..37af875a0
--- /dev/null
+++ b/src/video_core/macro/macro_hle.h
@@ -0,0 +1,44 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/macro/macro.h"
12
13namespace Tegra {
14
15namespace Engines {
16class Maxwell3D;
17}
18
19using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
20
21class HLEMacro {
22public:
23 explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
24 ~HLEMacro();
25
26 std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
27
28private:
29 Engines::Maxwell3D& maxwell3d;
30};
31
32class HLEMacroImpl : public CachedMacro {
33public:
34 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
35 ~HLEMacroImpl();
36
37 void Execute(const std::vector<u32>& parameters, u32 method) override;
38
39private:
40 Engines::Maxwell3D& maxwell3d;
41 HLEFunction func;
42};
43
44} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 5edff27aa..aa5256419 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -11,7 +11,8 @@
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); 11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12 12
13namespace Tegra { 13namespace Tegra {
14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
15 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
15 16
16std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { 17std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
17 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); 18 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index 389b58989..07292702f 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -28,7 +28,8 @@ static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
28 BRANCH_HOLDER, 28 BRANCH_HOLDER,
29}); 29});
30 30
31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
32 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
32 33
33std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { 34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
34 return std::make_unique<MacroJITx64Impl>(maxwell3d, code); 35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
@@ -553,7 +554,7 @@ Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
553} 554}
554 555
555void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { 556void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
556 auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { 557 const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
557 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero 558 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
558 // register. 559 // register.
559 if (reg == 0) { 560 if (reg == 0) {
@@ -561,7 +562,7 @@ void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u3
561 } 562 }
562 mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result); 563 mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
563 }; 564 };
564 auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; 565 const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
565 566
566 switch (operation) { 567 switch (operation) {
567 case Macro::ResultOperation::IgnoreAndFetch: 568 case Macro::ResultOperation::IgnoreAndFetch:
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index dbee9f634..ff5505d12 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
210 return range == inner_size; 210 return range == inner_size;
211} 211}
212 212
213void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { 213void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
214 const std::size_t size) const {
214 std::size_t remaining_size{size}; 215 std::size_t remaining_size{size};
215 std::size_t page_index{src_addr >> page_bits}; 216 std::size_t page_index{gpu_src_addr >> page_bits};
216 std::size_t page_offset{src_addr & page_mask}; 217 std::size_t page_offset{gpu_src_addr & page_mask};
217 218
218 auto& memory = system.Memory(); 219 auto& memory = system.Memory();
219 220
@@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
234 } 235 }
235} 236}
236 237
237void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, 238void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
238 const std::size_t size) const { 239 const std::size_t size) const {
239 std::size_t remaining_size{size}; 240 std::size_t remaining_size{size};
240 std::size_t page_index{src_addr >> page_bits}; 241 std::size_t page_index{gpu_src_addr >> page_bits};
241 std::size_t page_offset{src_addr & page_mask}; 242 std::size_t page_offset{gpu_src_addr & page_mask};
242 243
243 auto& memory = system.Memory(); 244 auto& memory = system.Memory();
244 245
@@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
259 } 260 }
260} 261}
261 262
262void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { 263void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
264 const std::size_t size) {
263 std::size_t remaining_size{size}; 265 std::size_t remaining_size{size};
264 std::size_t page_index{dest_addr >> page_bits}; 266 std::size_t page_index{gpu_dest_addr >> page_bits};
265 std::size_t page_offset{dest_addr & page_mask}; 267 std::size_t page_offset{gpu_dest_addr & page_mask};
266 268
267 auto& memory = system.Memory(); 269 auto& memory = system.Memory();
268 270
@@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
283 } 285 }
284} 286}
285 287
286void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, 288void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
287 const std::size_t size) { 289 const std::size_t size) {
288 std::size_t remaining_size{size}; 290 std::size_t remaining_size{size};
289 std::size_t page_index{dest_addr >> page_bits}; 291 std::size_t page_index{gpu_dest_addr >> page_bits};
290 std::size_t page_offset{dest_addr & page_mask}; 292 std::size_t page_offset{gpu_dest_addr & page_mask};
291 293
292 auto& memory = system.Memory(); 294 auto& memory = system.Memory();
293 295
@@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
306 } 308 }
307} 309}
308 310
309void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 311void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
312 const std::size_t size) {
310 std::vector<u8> tmp_buffer(size); 313 std::vector<u8> tmp_buffer(size);
311 ReadBlock(src_addr, tmp_buffer.data(), size); 314 ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
312 WriteBlock(dest_addr, tmp_buffer.data(), size); 315 WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
313} 316}
314 317
315void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 318void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
319 const std::size_t size) {
316 std::vector<u8> tmp_buffer(size); 320 std::vector<u8> tmp_buffer(size);
317 ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); 321 ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
318 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); 322 WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
319} 323}
320 324
321bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { 325bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0ddd52d5a..87658e87a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -79,9 +79,9 @@ public:
79 * in the Host Memory counterpart. Note: This functions cause Host GPU Memory 79 * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
80 * Flushes and Invalidations, respectively to each operation. 80 * Flushes and Invalidations, respectively to each operation.
81 */ 81 */
82 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; 82 void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
83 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 83 void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
84 void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 84 void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
85 85
86 /** 86 /**
87 * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and 87 * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,9 +93,9 @@ public:
93 * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture 93 * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
94 * being flushed. 94 * being flushed.
95 */ 95 */
96 void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; 96 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 97 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 98 void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
99 99
100 /** 100 /**
101 * IsGranularRange checks if a gpu region can be simply read with a pointer 101 * IsGranularRange checks if a gpu region can be simply read with a pointer
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index ad0577a4f..e461e4c70 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,21 +22,53 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22 22
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
24 24
25Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { 25Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
26 : VideoCommon::BufferBlock{cpu_addr, size} {
26 gl_buffer.Create(); 27 gl_buffer.Create();
27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
29 if (device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 }
28} 33}
29 34
30Buffer::~Buffer() = default; 35Buffer::~Buffer() = default;
31 36
37void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
39 data);
40}
41
42void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
44 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
45 const GLintptr gl_offset = static_cast<GLintptr>(offset);
46 if (read_buffer.handle == 0) {
47 read_buffer.Create();
48 glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr,
49 GL_STREAM_READ);
50 }
51 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
52 glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size);
53 glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data);
54}
55
56void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
57 std::size_t size) {
58 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
59 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
60}
61
32OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 62OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
33 const Device& device, std::size_t stream_size) 63 const Device& device_, std::size_t stream_size)
34 : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { 64 : GenericBufferCache{rasterizer, system,
65 std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
66 device{device_} {
35 if (!device.HasFastBufferSubData()) { 67 if (!device.HasFastBufferSubData()) {
36 return; 68 return;
37 } 69 }
38 70
39 static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); 71 static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
40 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 72 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
41 for (const GLuint cbuf : cbufs) { 73 for (const GLuint cbuf : cbufs) {
42 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); 74 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -48,39 +80,20 @@ OGLBufferCache::~OGLBufferCache() {
48} 80}
49 81
50std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 82std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
51 return std::make_shared<Buffer>(cpu_addr, size); 83 return std::make_shared<Buffer>(device, cpu_addr, size);
52} 84}
53 85
54GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { 86OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
55 return 0; 87 return {0, 0, 0};
56}
57
58void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
59 const u8* data) {
60 glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
61 static_cast<GLsizeiptr>(size), data);
62}
63
64void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
65 u8* data) {
66 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
67 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
68 glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset),
69 static_cast<GLsizeiptr>(size), data);
70}
71
72void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
73 std::size_t dst_offset, std::size_t size) {
74 glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset),
75 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
76} 88}
77 89
78OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, 90OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
79 std::size_t size) { 91 std::size_t size) {
80 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); 92 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
81 const GLuint cbuf = cbufs[cbuf_cursor++]; 93 const GLuint cbuf = cbufs[cbuf_cursor++];
94
82 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); 95 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
83 return {cbuf, 0}; 96 return {cbuf, 0, 0};
84} 97}
85 98
86} // namespace OpenGL 99} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a49aaf9c4..88fdc0536 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -25,15 +25,28 @@ class RasterizerOpenGL;
25 25
26class Buffer : public VideoCommon::BufferBlock { 26class Buffer : public VideoCommon::BufferBlock {
27public: 27public:
28 explicit Buffer(VAddr cpu_addr, const std::size_t size); 28 explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
29 ~Buffer(); 29 ~Buffer();
30 30
31 GLuint Handle() const { 31 void Upload(std::size_t offset, std::size_t size, const u8* data);
32
33 void Download(std::size_t offset, std::size_t size, u8* data);
34
35 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
36 std::size_t size);
37
38 GLuint Handle() const noexcept {
32 return gl_buffer.handle; 39 return gl_buffer.handle;
33 } 40 }
34 41
42 u64 Address() const noexcept {
43 return gpu_address;
44 }
45
35private: 46private:
36 OGLBuffer gl_buffer; 47 OGLBuffer gl_buffer;
48 OGLBuffer read_buffer;
49 u64 gpu_address = 0;
37}; 50};
38 51
39using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; 52using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
@@ -43,7 +56,7 @@ public:
43 const Device& device, std::size_t stream_size); 56 const Device& device, std::size_t stream_size);
44 ~OGLBufferCache(); 57 ~OGLBufferCache();
45 58
46 GLuint GetEmptyBuffer(std::size_t) override; 59 BufferInfo GetEmptyBuffer(std::size_t) override;
47 60
48 void Acquire() noexcept { 61 void Acquire() noexcept {
49 cbuf_cursor = 0; 62 cbuf_cursor = 0;
@@ -52,22 +65,16 @@ public:
52protected: 65protected:
53 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 66 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
54 67
55 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
56 const u8* data) override;
57
58 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
59 u8* data) override;
60
61 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
62 std::size_t dst_offset, std::size_t size) override;
63
64 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; 68 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
65 69
66private: 70private:
71 static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
72 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
73
74 const Device& device;
75
67 std::size_t cbuf_cursor = 0; 76 std::size_t cbuf_cursor = 0;
68 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * 77 std::array<GLuint, NUM_CBUFS> cbufs{};
69 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
70 cbufs;
71}; 78};
72 79
73} // namespace OpenGL 80} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b31d604e4..208fc6167 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -178,7 +178,7 @@ bool IsASTCSupported() {
178 for (const GLenum format : formats) { 178 for (const GLenum format : formats) {
179 for (const GLenum support : required_support) { 179 for (const GLenum support : required_support) {
180 GLint value; 180 GLint value;
181 glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); 181 glGetInternalformativ(target, format, support, 1, &value);
182 if (value != GL_FULL_SUPPORT) { 182 if (value != GL_FULL_SUPPORT) {
183 return false; 183 return false;
184 } 184 }
@@ -193,6 +193,7 @@ bool IsASTCSupported() {
193Device::Device() 193Device::Device()
194 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { 194 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
195 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 195 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
196 const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
196 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); 197 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
197 const std::vector extensions = GetExtensions(); 198 const std::vector extensions = GetExtensions();
198 199
@@ -216,12 +217,18 @@ Device::Device()
216 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 217 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
217 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 218 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
218 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 219 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
220 has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
219 has_astc = IsASTCSupported(); 221 has_astc = IsASTCSupported();
220 has_variable_aoffi = TestVariableAoffi(); 222 has_variable_aoffi = TestVariableAoffi();
221 has_component_indexing_bug = is_amd; 223 has_component_indexing_bug = is_amd;
222 has_precise_bug = TestPreciseBug(); 224 has_precise_bug = TestPreciseBug();
223 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
224 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; 225 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
226 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
227
228 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
229 // uniform buffers as "push constants"
230 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
231
225 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 232 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
226 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && 233 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
227 GLAD_GL_NV_transform_feedback2; 234 GLAD_GL_NV_transform_feedback2;
@@ -245,6 +252,7 @@ Device::Device(std::nullptr_t) {
245 has_shader_ballot = true; 252 has_shader_ballot = true;
246 has_vertex_viewport_layer = true; 253 has_vertex_viewport_layer = true;
247 has_image_load_formatted = true; 254 has_image_load_formatted = true;
255 has_texture_shadow_lod = true;
248 has_variable_aoffi = true; 256 has_variable_aoffi = true;
249} 257}
250 258
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 145347943..e1d811966 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -68,6 +68,14 @@ public:
68 return has_image_load_formatted; 68 return has_image_load_formatted;
69 } 69 }
70 70
71 bool HasTextureShadowLod() const {
72 return has_texture_shadow_lod;
73 }
74
75 bool HasVertexBufferUnifiedMemory() const {
76 return has_vertex_buffer_unified_memory;
77 }
78
71 bool HasASTC() const { 79 bool HasASTC() const {
72 return has_astc; 80 return has_astc;
73 } 81 }
@@ -110,6 +118,8 @@ private:
110 bool has_shader_ballot{}; 118 bool has_shader_ballot{};
111 bool has_vertex_viewport_layer{}; 119 bool has_vertex_viewport_layer{};
112 bool has_image_load_formatted{}; 120 bool has_image_load_formatted{};
121 bool has_texture_shadow_lod{};
122 bool has_vertex_buffer_unified_memory{};
113 bool has_astc{}; 123 bool has_astc{};
114 bool has_variable_aoffi{}; 124 bool has_variable_aoffi{};
115 bool has_component_indexing_bug{}; 125 bool has_component_indexing_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2d6c11320..e960a0ef1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -61,7 +61,8 @@ constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = 61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; 62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63 63
64constexpr std::size_t NumSupportedVertexAttributes = 16; 64constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
65 66
66template <typename Engine, typename Entry> 67template <typename Engine, typename Entry>
67Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 68Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
@@ -193,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
193 // avoid OpenGL errors. 194 // avoid OpenGL errors.
194 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't 195 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
195 // assume every shader uses them all. 196 // assume every shader uses them all.
196 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { 197 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
197 if (!flags[Dirty::VertexFormat0 + index]) { 198 if (!flags[Dirty::VertexFormat0 + index]) {
198 continue; 199 continue;
199 } 200 }
@@ -212,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() {
212 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || 213 if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
213 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { 214 attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
214 glVertexAttribIFormat(gl_index, attrib.ComponentCount(), 215 glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
215 MaxwellToGL::VertexType(attrib), attrib.offset); 216 MaxwellToGL::VertexFormat(attrib), attrib.offset);
216 } else { 217 } else {
217 glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), 218 glVertexAttribFormat(gl_index, attrib.ComponentCount(),
219 MaxwellToGL::VertexFormat(attrib),
218 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); 220 attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
219 } 221 }
220 glVertexAttribBinding(gl_index, attrib.buffer); 222 glVertexAttribBinding(gl_index, attrib.buffer);
@@ -231,9 +233,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
231 233
232 MICROPROFILE_SCOPE(OpenGL_VB); 234 MICROPROFILE_SCOPE(OpenGL_VB);
233 235
236 const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
237
234 // Upload all guest vertex arrays sequentially to our buffer 238 // Upload all guest vertex arrays sequentially to our buffer
235 const auto& regs = gpu.regs; 239 const auto& regs = gpu.regs;
236 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 240 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
237 if (!flags[Dirty::VertexBuffer0 + index]) { 241 if (!flags[Dirty::VertexBuffer0 + index]) {
238 continue; 242 continue;
239 } 243 }
@@ -246,16 +250,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
246 250
247 const GPUVAddr start = vertex_array.StartAddress(); 251 const GPUVAddr start = vertex_array.StartAddress();
248 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 252 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
249
250 ASSERT(end >= start); 253 ASSERT(end >= start);
254
255 const GLuint gl_index = static_cast<GLuint>(index);
251 const u64 size = end - start; 256 const u64 size = end - start;
252 if (size == 0) { 257 if (size == 0) {
253 glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); 258 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
259 if (use_unified_memory) {
260 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
261 }
254 continue; 262 continue;
255 } 263 }
256 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); 264 const auto info = buffer_cache.UploadMemory(start, size);
257 glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, 265 if (use_unified_memory) {
258 vertex_array.stride); 266 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
267 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
268 info.address + info.offset, size);
269 } else {
270 glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
271 }
259 } 272 }
260} 273}
261 274
@@ -268,7 +281,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
268 flags[Dirty::VertexInstances] = false; 281 flags[Dirty::VertexInstances] = false;
269 282
270 const auto& regs = gpu.regs; 283 const auto& regs = gpu.regs;
271 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { 284 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
272 if (!flags[Dirty::VertexInstance0 + index]) { 285 if (!flags[Dirty::VertexInstance0 + index]) {
273 continue; 286 continue;
274 } 287 }
@@ -285,9 +298,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
285 MICROPROFILE_SCOPE(OpenGL_Index); 298 MICROPROFILE_SCOPE(OpenGL_Index);
286 const auto& regs = system.GPU().Maxwell3D().regs; 299 const auto& regs = system.GPU().Maxwell3D().regs;
287 const std::size_t size = CalculateIndexBufferSize(); 300 const std::size_t size = CalculateIndexBufferSize();
288 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); 301 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
289 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); 302 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
290 return offset; 303 return info.offset;
291} 304}
292 305
293void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 306void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@@ -643,9 +656,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
643 if (!device.UseAssemblyShaders()) { 656 if (!device.UseAssemblyShaders()) {
644 MaxwellUniformData ubo; 657 MaxwellUniformData ubo;
645 ubo.SetFromRegs(gpu); 658 ubo.SetFromRegs(gpu);
646 const auto [buffer, offset] = 659 const auto info =
647 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 660 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
648 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, 661 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
649 static_cast<GLsizeiptr>(sizeof(ubo))); 662 static_cast<GLsizeiptr>(sizeof(ubo)));
650 } 663 }
651 664
@@ -956,8 +969,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
956 if (device.UseAssemblyShaders()) { 969 if (device.UseAssemblyShaders()) {
957 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); 970 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
958 } else { 971 } else {
959 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 972 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
960 buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
961 } 973 }
962 return; 974 return;
963 } 975 }
@@ -970,24 +982,25 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
970 982
971 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); 983 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
972 const GPUVAddr gpu_addr = buffer.address; 984 const GPUVAddr gpu_addr = buffer.address;
973 auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); 985 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
974 986
975 if (device.UseAssemblyShaders()) { 987 if (device.UseAssemblyShaders()) {
976 UNIMPLEMENTED_IF(use_unified); 988 UNIMPLEMENTED_IF(use_unified);
977 if (offset != 0) { 989 if (info.offset != 0) {
978 const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; 990 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
979 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); 991 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
980 cbuf = staging_cbuf; 992 info.handle = staging_cbuf;
981 offset = 0; 993 info.offset = 0;
982 } 994 }
983 glBindBufferRangeNV(stage, binding, cbuf, offset, size); 995 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
984 return; 996 return;
985 } 997 }
986 998
987 if (use_unified) { 999 if (use_unified) {
988 glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); 1000 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
1001 unified_offset, size);
989 } else { 1002 } else {
990 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 1003 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
991 } 1004 }
992} 1005}
993 1006
@@ -1023,9 +1036,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
1023void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, 1036void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
1024 GPUVAddr gpu_addr, std::size_t size) { 1037 GPUVAddr gpu_addr, std::size_t size) {
1025 const auto alignment{device.GetShaderStorageBufferAlignment()}; 1038 const auto alignment{device.GetShaderStorageBufferAlignment()};
1026 const auto [ssbo, buffer_offset] = 1039 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
1027 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); 1040 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
1028 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
1029 static_cast<GLsizeiptr>(size)); 1041 static_cast<GLsizeiptr>(size));
1030} 1042}
1031 1043
@@ -1712,8 +1724,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
1712 const GLuint handle = transform_feedback_buffers[index].handle; 1724 const GLuint handle = transform_feedback_buffers[index].handle;
1713 const GPUVAddr gpu_addr = binding.Address(); 1725 const GPUVAddr gpu_addr = binding.Address();
1714 const std::size_t size = binding.buffer_size; 1726 const std::size_t size = binding.buffer_size;
1715 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 1727 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1716 glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); 1728 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1729 static_cast<GLsizeiptr>(size));
1717 } 1730 }
1718} 1731}
1719 1732
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 46e780a06..c6a3bf3a1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -460,8 +460,9 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
460 const u8* host_ptr_b = memory_manager.GetPointer(address_b); 460 const u8* host_ptr_b = memory_manager.GetPointer(address_b);
461 code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); 461 code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
462 } 462 }
463 const std::size_t code_size = code.size() * sizeof(u64);
463 464
464 const auto unique_identifier = GetUniqueIdentifier( 465 const u64 unique_identifier = GetUniqueIdentifier(
465 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 466 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
466 467
467 const ShaderParameters params{system, disk_cache, device, 468 const ShaderParameters params{system, disk_cache, device,
@@ -477,7 +478,7 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
477 478
478 Shader* const result = shader.get(); 479 Shader* const result = shader.get();
479 if (cpu_addr) { 480 if (cpu_addr) {
480 Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); 481 Register(std::move(shader), *cpu_addr, code_size);
481 } else { 482 } else {
482 null_shader = std::move(shader); 483 null_shader = std::move(shader);
483 } 484 }
@@ -495,8 +496,9 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
495 496
496 const auto host_ptr{memory_manager.GetPointer(code_addr)}; 497 const auto host_ptr{memory_manager.GetPointer(code_addr)};
497 // No kernel found, create a new one 498 // No kernel found, create a new one
498 auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; 499 ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
499 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 500 const std::size_t code_size{code.size() * sizeof(u64)};
501 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
500 502
501 const ShaderParameters params{system, disk_cache, device, 503 const ShaderParameters params{system, disk_cache, device,
502 *cpu_addr, host_ptr, unique_identifier}; 504 *cpu_addr, host_ptr, unique_identifier};
@@ -511,7 +513,7 @@ Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
511 513
512 Shader* const result = kernel.get(); 514 Shader* const result = kernel.get();
513 if (cpu_addr) { 515 if (cpu_addr) {
514 Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); 516 Register(std::move(kernel), *cpu_addr, code_size);
515 } else { 517 } else {
516 null_kernel = std::move(kernel); 518 null_kernel = std::move(kernel);
517 } 519 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 6848f1388..994aaeaf2 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -37,7 +37,6 @@ namespace OpenGL {
37 37
38class Device; 38class Device;
39class RasterizerOpenGL; 39class RasterizerOpenGL;
40struct UnspecializedShader;
41 40
42using Maxwell = Tegra::Engines::Maxwell3D::Regs; 41using Maxwell = Tegra::Engines::Maxwell3D::Regs;
43 42
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d6e30b321..2c49aeaac 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 37using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using Tegra::Shader::TextureType;
40using VideoCommon::Shader::BuildTransformFeedback; 41using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry; 42using VideoCommon::Shader::Registry;
42 43
@@ -526,6 +527,9 @@ private:
526 if (device.HasImageLoadFormatted()) { 527 if (device.HasImageLoadFormatted()) {
527 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); 528 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
528 } 529 }
530 if (device.HasTextureShadowLod()) {
531 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
532 }
529 if (device.HasWarpIntrinsics()) { 533 if (device.HasWarpIntrinsics()) {
530 code.AddLine("#extension GL_NV_gpu_shader5 : require"); 534 code.AddLine("#extension GL_NV_gpu_shader5 : require");
531 code.AddLine("#extension GL_NV_shader_thread_group : require"); 535 code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -909,13 +913,13 @@ private:
909 return "samplerBuffer"; 913 return "samplerBuffer";
910 } 914 }
911 switch (sampler.type) { 915 switch (sampler.type) {
912 case Tegra::Shader::TextureType::Texture1D: 916 case TextureType::Texture1D:
913 return "sampler1D"; 917 return "sampler1D";
914 case Tegra::Shader::TextureType::Texture2D: 918 case TextureType::Texture2D:
915 return "sampler2D"; 919 return "sampler2D";
916 case Tegra::Shader::TextureType::Texture3D: 920 case TextureType::Texture3D:
917 return "sampler3D"; 921 return "sampler3D";
918 case Tegra::Shader::TextureType::TextureCube: 922 case TextureType::TextureCube:
919 return "samplerCube"; 923 return "samplerCube";
920 default: 924 default:
921 UNREACHABLE(); 925 UNREACHABLE();
@@ -1380,8 +1384,19 @@ private:
1380 const std::size_t count = operation.GetOperandsCount(); 1384 const std::size_t count = operation.GetOperandsCount();
1381 const bool has_array = meta->sampler.is_array; 1385 const bool has_array = meta->sampler.is_array;
1382 const bool has_shadow = meta->sampler.is_shadow; 1386 const bool has_shadow = meta->sampler.is_shadow;
1387 const bool workaround_lod_array_shadow_as_grad =
1388 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1389 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1390 meta->sampler.type == TextureType::TextureCube);
1391
1392 std::string expr = "texture";
1393
1394 if (workaround_lod_array_shadow_as_grad) {
1395 expr += "Grad";
1396 } else {
1397 expr += function_suffix;
1398 }
1383 1399
1384 std::string expr = "texture" + function_suffix;
1385 if (!meta->aoffi.empty()) { 1400 if (!meta->aoffi.empty()) {
1386 expr += "Offset"; 1401 expr += "Offset";
1387 } else if (!meta->ptp.empty()) { 1402 } else if (!meta->ptp.empty()) {
@@ -1415,6 +1430,16 @@ private:
1415 expr += ')'; 1430 expr += ')';
1416 } 1431 }
1417 1432
1433 if (workaround_lod_array_shadow_as_grad) {
1434 switch (meta->sampler.type) {
1435 case TextureType::Texture2D:
1436 return expr + ", vec2(0.0), vec2(0.0))";
1437 case TextureType::TextureCube:
1438 return expr + ", vec3(0.0), vec3(0.0))";
1439 }
1440 UNREACHABLE();
1441 }
1442
1418 for (const auto& variant : extras) { 1443 for (const auto& variant : extras) {
1419 if (const auto argument = std::get_if<TextureArgument>(&variant)) { 1444 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1420 expr += GenerateTextureArgument(*argument); 1445 expr += GenerateTextureArgument(*argument);
@@ -2041,8 +2066,19 @@ private:
2041 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 2066 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2042 ASSERT(meta); 2067 ASSERT(meta);
2043 2068
2044 std::string expr = GenerateTexture( 2069 std::string expr{};
2045 operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); 2070
2071 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2072 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2073 meta->sampler.type == TextureType::TextureCube)) {
2074 LOG_ERROR(Render_OpenGL,
2075 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2076 expr = GenerateTexture(operation, "Lod", {});
2077 } else {
2078 expr = GenerateTexture(operation, "Lod",
2079 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2080 }
2081
2046 if (meta->sampler.is_shadow) { 2082 if (meta->sampler.is_shadow) {
2047 expr = "vec4(" + expr + ')'; 2083 expr = "vec4(" + expr + ')';
2048 } 2084 }
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 932a2f69e..3655ff629 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,11 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <deque> 5#include <tuple>
6#include <vector> 6#include <vector>
7
7#include "common/alignment.h" 8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h"
10#include "video_core/renderer_opengl/gl_stream_buffer.h" 12#include "video_core/renderer_opengl/gl_stream_buffer.h"
11 13
12MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
14 16
15namespace OpenGL { 17namespace OpenGL {
16 18
17OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, 19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
18 bool use_persistent)
19 : buffer_size(size) { 20 : buffer_size(size) {
20 gl_buffer.Create(); 21 gl_buffer.Create();
21 22
@@ -29,23 +30,19 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
29 allocate_size *= 2; 30 allocate_size *= 2;
30 } 31 }
31 32
32 if (use_persistent) { 33 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
33 persistent = true; 34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
34 coherent = prefer_coherent; 35 mapped_ptr = static_cast<u8*>(
35 const GLbitfield flags = 36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
36 GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); 37
37 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 38 if (device.HasVertexBufferUnifiedMemory()) {
38 mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( 39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
39 gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); 40 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
40 } else {
41 glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
42 } 41 }
43} 42}
44 43
45OGLStreamBuffer::~OGLStreamBuffer() { 44OGLStreamBuffer::~OGLStreamBuffer() {
46 if (persistent) { 45 glUnmapNamedBuffer(gl_buffer.handle);
47 glUnmapNamedBuffer(gl_buffer.handle);
48 }
49 gl_buffer.Release(); 46 gl_buffer.Release();
50} 47}
51 48
@@ -60,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
60 57
61 bool invalidate = false; 58 bool invalidate = false;
62 if (buffer_pos + size > buffer_size) { 59 if (buffer_pos + size > buffer_size) {
60 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
61 glInvalidateBufferData(gl_buffer.handle);
62
63 buffer_pos = 0; 63 buffer_pos = 0;
64 invalidate = true; 64 invalidate = true;
65
66 if (persistent) {
67 glUnmapNamedBuffer(gl_buffer.handle);
68 }
69 } 65 }
70 66
71 if (invalidate || !persistent) { 67 return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
72 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
73 GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
74 (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
75 (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
76 mapped_ptr = static_cast<u8*>(
77 glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
78 mapped_offset = buffer_pos;
79 }
80
81 return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
82} 68}
83 69
84void OGLStreamBuffer::Unmap(GLsizeiptr size) { 70void OGLStreamBuffer::Unmap(GLsizeiptr size) {
85 ASSERT(size <= mapped_size); 71 ASSERT(size <= mapped_size);
86 72
87 if (!coherent && size > 0) { 73 if (size > 0) {
88 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); 74 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
89 }
90
91 if (!persistent) {
92 glUnmapNamedBuffer(gl_buffer.handle);
93 } 75 }
94 76
95 buffer_pos += size; 77 buffer_pos += size;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 866da3594..307a67113 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,10 +11,11 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class Device;
15
14class OGLStreamBuffer : private NonCopyable { 16class OGLStreamBuffer : private NonCopyable {
15public: 17public:
16 explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, 18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
17 bool use_persistent = true);
18 ~OGLStreamBuffer(); 19 ~OGLStreamBuffer();
19 20
20 /* 21 /*
@@ -33,19 +34,20 @@ public:
33 return gl_buffer.handle; 34 return gl_buffer.handle;
34 } 35 }
35 36
36 GLsizeiptr Size() const { 37 u64 Address() const {
38 return gpu_address;
39 }
40
41 GLsizeiptr Size() const noexcept {
37 return buffer_size; 42 return buffer_size;
38 } 43 }
39 44
40private: 45private:
41 OGLBuffer gl_buffer; 46 OGLBuffer gl_buffer;
42 47
43 bool coherent = false; 48 GLuint64EXT gpu_address = 0;
44 bool persistent = false;
45
46 GLintptr buffer_pos = 0; 49 GLintptr buffer_pos = 0;
47 GLsizeiptr buffer_size = 0; 50 GLsizeiptr buffer_size = 0;
48 GLintptr mapped_offset = 0;
49 GLsizeiptr mapped_size = 0; 51 GLsizeiptr mapped_size = 0;
50 u8* mapped_ptr = nullptr; 52 u8* mapped_ptr = nullptr;
51}; 53};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 35e329240..fe9bd4b5a 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -24,10 +24,11 @@ namespace MaxwellToGL {
24 24
25using Maxwell = Tegra::Engines::Maxwell3D::Regs; 25using Maxwell = Tegra::Engines::Maxwell3D::Regs;
26 26
27inline GLenum VertexType(Maxwell::VertexAttribute attrib) { 27inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
28 switch (attrib.type) { 28 switch (attrib.type) {
29 case Maxwell::VertexAttribute::Type::UnsignedInt:
30 case Maxwell::VertexAttribute::Type::UnsignedNorm: 29 case Maxwell::VertexAttribute::Type::UnsignedNorm:
30 case Maxwell::VertexAttribute::Type::UnsignedScaled:
31 case Maxwell::VertexAttribute::Type::UnsignedInt:
31 switch (attrib.size) { 32 switch (attrib.size) {
32 case Maxwell::VertexAttribute::Size::Size_8: 33 case Maxwell::VertexAttribute::Size::Size_8:
33 case Maxwell::VertexAttribute::Size::Size_8_8: 34 case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
48 return GL_UNSIGNED_INT_2_10_10_10_REV; 49 return GL_UNSIGNED_INT_2_10_10_10_REV;
49 } 50 }
50 break; 51 break;
51 case Maxwell::VertexAttribute::Type::SignedInt:
52 case Maxwell::VertexAttribute::Type::SignedNorm: 52 case Maxwell::VertexAttribute::Type::SignedNorm:
53 case Maxwell::VertexAttribute::Type::SignedScaled:
54 case Maxwell::VertexAttribute::Type::SignedInt:
53 switch (attrib.size) { 55 switch (attrib.size) {
54 case Maxwell::VertexAttribute::Size::Size_8: 56 case Maxwell::VertexAttribute::Size::Size_8:
55 case Maxwell::VertexAttribute::Size::Size_8_8: 57 case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
84 return GL_FLOAT; 86 return GL_FLOAT;
85 } 87 }
86 break; 88 break;
87 case Maxwell::VertexAttribute::Type::UnsignedScaled:
88 switch (attrib.size) {
89 case Maxwell::VertexAttribute::Size::Size_8:
90 case Maxwell::VertexAttribute::Size::Size_8_8:
91 case Maxwell::VertexAttribute::Size::Size_8_8_8:
92 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
93 return GL_UNSIGNED_BYTE;
94 case Maxwell::VertexAttribute::Size::Size_16:
95 case Maxwell::VertexAttribute::Size::Size_16_16:
96 case Maxwell::VertexAttribute::Size::Size_16_16_16:
97 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
98 return GL_UNSIGNED_SHORT;
99 }
100 break;
101 case Maxwell::VertexAttribute::Type::SignedScaled:
102 switch (attrib.size) {
103 case Maxwell::VertexAttribute::Size::Size_8:
104 case Maxwell::VertexAttribute::Size::Size_8_8:
105 case Maxwell::VertexAttribute::Size::Size_8_8_8:
106 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
107 return GL_BYTE;
108 case Maxwell::VertexAttribute::Size::Size_16:
109 case Maxwell::VertexAttribute::Size::Size_16_16:
110 case Maxwell::VertexAttribute::Size::Size_16_16_16:
111 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
112 return GL_SHORT;
113 }
114 break;
115 } 89 }
116 UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), 90 UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
117 attrib.SizeString()); 91 attrib.SizeString());
118 return {}; 92 return {};
119} 93}
@@ -217,6 +191,12 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
217 } else { 191 } else {
218 return GL_MIRROR_CLAMP_TO_EDGE; 192 return GL_MIRROR_CLAMP_TO_EDGE;
219 } 193 }
194 case Tegra::Texture::WrapMode::MirrorOnceClampOGL:
195 if (GL_EXT_texture_mirror_clamp) {
196 return GL_MIRROR_CLAMP_EXT;
197 } else {
198 return GL_MIRROR_CLAMP_TO_EDGE;
199 }
220 } 200 }
221 UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); 201 UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
222 return GL_REPEAT; 202 return GL_REPEAT;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6214fcbc3..c40adb6e7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
488 488
489 // Clear screen to black 489 // Clear screen to black
490 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 490 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
491
492 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
493 if (device.HasVertexBufferUnifiedMemory()) {
494 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
495
496 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
497 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
498 &vertex_buffer_address);
499 }
491} 500}
492 501
493void RendererOpenGL::AddTelemetryFields() { 502void RendererOpenGL::AddTelemetryFields() {
@@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
656 offsetof(ScreenRectVertex, tex_coord)); 665 offsetof(ScreenRectVertex, tex_coord));
657 glVertexAttribBinding(PositionLocation, 0); 666 glVertexAttribBinding(PositionLocation, 0);
658 glVertexAttribBinding(TexCoordLocation, 0); 667 glVertexAttribBinding(TexCoordLocation, 0);
659 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); 668 if (device.HasVertexBufferUnifiedMemory()) {
669 glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
670 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
671 sizeof(vertices));
672 } else {
673 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
674 }
660 675
661 glBindTextureUnit(0, screen_info.display_texture); 676 glBindTextureUnit(0, screen_info.display_texture);
662 glBindSampler(0, 0); 677 glBindSampler(0, 0);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 61bf507f4..8b18d32e6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -107,6 +107,9 @@ private:
107 OGLPipeline pipeline; 107 OGLPipeline pipeline;
108 OGLFramebuffer screenshot_framebuffer; 108 OGLFramebuffer screenshot_framebuffer;
109 109
110 // GPU address of the vertex buffer
111 GLuint64EXT vertex_buffer_address = 0;
112
110 /// Display information for Switch screen 113 /// Display information for Switch screen
111 ScreenInfo screen_info; 114 ScreenInfo screen_info;
112 115
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 1f2b6734b..d7f1ae89f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
294 294
295VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { 295VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
296 switch (type) { 296 switch (type) {
297 case Maxwell::VertexAttribute::Type::UnsignedNorm:
298 switch (size) {
299 case Maxwell::VertexAttribute::Size::Size_8:
300 return VK_FORMAT_R8_UNORM;
301 case Maxwell::VertexAttribute::Size::Size_8_8:
302 return VK_FORMAT_R8G8_UNORM;
303 case Maxwell::VertexAttribute::Size::Size_8_8_8:
304 return VK_FORMAT_R8G8B8_UNORM;
305 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
306 return VK_FORMAT_R8G8B8A8_UNORM;
307 case Maxwell::VertexAttribute::Size::Size_16:
308 return VK_FORMAT_R16_UNORM;
309 case Maxwell::VertexAttribute::Size::Size_16_16:
310 return VK_FORMAT_R16G16_UNORM;
311 case Maxwell::VertexAttribute::Size::Size_16_16_16:
312 return VK_FORMAT_R16G16B16_UNORM;
313 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
314 return VK_FORMAT_R16G16B16A16_UNORM;
315 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
316 return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
317 }
318 break;
297 case Maxwell::VertexAttribute::Type::SignedNorm: 319 case Maxwell::VertexAttribute::Type::SignedNorm:
298 switch (size) { 320 switch (size) {
299 case Maxwell::VertexAttribute::Size::Size_8: 321 case Maxwell::VertexAttribute::Size::Size_8:
@@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
314 return VK_FORMAT_R16G16B16A16_SNORM; 336 return VK_FORMAT_R16G16B16A16_SNORM;
315 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 337 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
316 return VK_FORMAT_A2B10G10R10_SNORM_PACK32; 338 return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
317 default:
318 break;
319 } 339 }
320 break; 340 break;
321 case Maxwell::VertexAttribute::Type::UnsignedNorm: 341 case Maxwell::VertexAttribute::Type::UnsignedScaled:
322 switch (size) { 342 switch (size) {
323 case Maxwell::VertexAttribute::Size::Size_8: 343 case Maxwell::VertexAttribute::Size::Size_8:
324 return VK_FORMAT_R8_UNORM; 344 return VK_FORMAT_R8_USCALED;
325 case Maxwell::VertexAttribute::Size::Size_8_8: 345 case Maxwell::VertexAttribute::Size::Size_8_8:
326 return VK_FORMAT_R8G8_UNORM; 346 return VK_FORMAT_R8G8_USCALED;
327 case Maxwell::VertexAttribute::Size::Size_8_8_8: 347 case Maxwell::VertexAttribute::Size::Size_8_8_8:
328 return VK_FORMAT_R8G8B8_UNORM; 348 return VK_FORMAT_R8G8B8_USCALED;
329 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 349 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
330 return VK_FORMAT_R8G8B8A8_UNORM; 350 return VK_FORMAT_R8G8B8A8_USCALED;
331 case Maxwell::VertexAttribute::Size::Size_16: 351 case Maxwell::VertexAttribute::Size::Size_16:
332 return VK_FORMAT_R16_UNORM; 352 return VK_FORMAT_R16_USCALED;
333 case Maxwell::VertexAttribute::Size::Size_16_16: 353 case Maxwell::VertexAttribute::Size::Size_16_16:
334 return VK_FORMAT_R16G16_UNORM; 354 return VK_FORMAT_R16G16_USCALED;
335 case Maxwell::VertexAttribute::Size::Size_16_16_16: 355 case Maxwell::VertexAttribute::Size::Size_16_16_16:
336 return VK_FORMAT_R16G16B16_UNORM; 356 return VK_FORMAT_R16G16B16_USCALED;
337 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 357 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
338 return VK_FORMAT_R16G16B16A16_UNORM; 358 return VK_FORMAT_R16G16B16A16_USCALED;
339 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 359 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
340 return VK_FORMAT_A2B10G10R10_UNORM_PACK32; 360 return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
341 default:
342 break;
343 } 361 }
344 break; 362 break;
345 case Maxwell::VertexAttribute::Type::SignedInt: 363 case Maxwell::VertexAttribute::Type::SignedScaled:
346 switch (size) { 364 switch (size) {
347 case Maxwell::VertexAttribute::Size::Size_8: 365 case Maxwell::VertexAttribute::Size::Size_8:
348 return VK_FORMAT_R8_SINT; 366 return VK_FORMAT_R8_SSCALED;
349 case Maxwell::VertexAttribute::Size::Size_8_8: 367 case Maxwell::VertexAttribute::Size::Size_8_8:
350 return VK_FORMAT_R8G8_SINT; 368 return VK_FORMAT_R8G8_SSCALED;
351 case Maxwell::VertexAttribute::Size::Size_8_8_8: 369 case Maxwell::VertexAttribute::Size::Size_8_8_8:
352 return VK_FORMAT_R8G8B8_SINT; 370 return VK_FORMAT_R8G8B8_SSCALED;
353 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 371 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
354 return VK_FORMAT_R8G8B8A8_SINT; 372 return VK_FORMAT_R8G8B8A8_SSCALED;
355 case Maxwell::VertexAttribute::Size::Size_16: 373 case Maxwell::VertexAttribute::Size::Size_16:
356 return VK_FORMAT_R16_SINT; 374 return VK_FORMAT_R16_SSCALED;
357 case Maxwell::VertexAttribute::Size::Size_16_16: 375 case Maxwell::VertexAttribute::Size::Size_16_16:
358 return VK_FORMAT_R16G16_SINT; 376 return VK_FORMAT_R16G16_SSCALED;
359 case Maxwell::VertexAttribute::Size::Size_16_16_16: 377 case Maxwell::VertexAttribute::Size::Size_16_16_16:
360 return VK_FORMAT_R16G16B16_SINT; 378 return VK_FORMAT_R16G16B16_SSCALED;
361 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 379 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
362 return VK_FORMAT_R16G16B16A16_SINT; 380 return VK_FORMAT_R16G16B16A16_SSCALED;
363 case Maxwell::VertexAttribute::Size::Size_32: 381 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
364 return VK_FORMAT_R32_SINT; 382 return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
365 case Maxwell::VertexAttribute::Size::Size_32_32:
366 return VK_FORMAT_R32G32_SINT;
367 case Maxwell::VertexAttribute::Size::Size_32_32_32:
368 return VK_FORMAT_R32G32B32_SINT;
369 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
370 return VK_FORMAT_R32G32B32A32_SINT;
371 default:
372 break;
373 } 383 }
374 break; 384 break;
375 case Maxwell::VertexAttribute::Type::UnsignedInt: 385 case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
398 return VK_FORMAT_R32G32B32_UINT; 408 return VK_FORMAT_R32G32B32_UINT;
399 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 409 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
400 return VK_FORMAT_R32G32B32A32_UINT; 410 return VK_FORMAT_R32G32B32A32_UINT;
401 default: 411 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
402 break; 412 return VK_FORMAT_A2B10G10R10_UINT_PACK32;
403 } 413 }
404 break; 414 break;
405 case Maxwell::VertexAttribute::Type::UnsignedScaled: 415 case Maxwell::VertexAttribute::Type::SignedInt:
406 switch (size) { 416 switch (size) {
407 case Maxwell::VertexAttribute::Size::Size_8: 417 case Maxwell::VertexAttribute::Size::Size_8:
408 return VK_FORMAT_R8_USCALED; 418 return VK_FORMAT_R8_SINT;
409 case Maxwell::VertexAttribute::Size::Size_8_8: 419 case Maxwell::VertexAttribute::Size::Size_8_8:
410 return VK_FORMAT_R8G8_USCALED; 420 return VK_FORMAT_R8G8_SINT;
411 case Maxwell::VertexAttribute::Size::Size_8_8_8: 421 case Maxwell::VertexAttribute::Size::Size_8_8_8:
412 return VK_FORMAT_R8G8B8_USCALED; 422 return VK_FORMAT_R8G8B8_SINT;
413 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 423 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
414 return VK_FORMAT_R8G8B8A8_USCALED; 424 return VK_FORMAT_R8G8B8A8_SINT;
415 case Maxwell::VertexAttribute::Size::Size_16: 425 case Maxwell::VertexAttribute::Size::Size_16:
416 return VK_FORMAT_R16_USCALED; 426 return VK_FORMAT_R16_SINT;
417 case Maxwell::VertexAttribute::Size::Size_16_16: 427 case Maxwell::VertexAttribute::Size::Size_16_16:
418 return VK_FORMAT_R16G16_USCALED; 428 return VK_FORMAT_R16G16_SINT;
419 case Maxwell::VertexAttribute::Size::Size_16_16_16: 429 case Maxwell::VertexAttribute::Size::Size_16_16_16:
420 return VK_FORMAT_R16G16B16_USCALED; 430 return VK_FORMAT_R16G16B16_SINT;
421 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 431 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
422 return VK_FORMAT_R16G16B16A16_USCALED; 432 return VK_FORMAT_R16G16B16A16_SINT;
423 default: 433 case Maxwell::VertexAttribute::Size::Size_32:
424 break; 434 return VK_FORMAT_R32_SINT;
435 case Maxwell::VertexAttribute::Size::Size_32_32:
436 return VK_FORMAT_R32G32_SINT;
437 case Maxwell::VertexAttribute::Size::Size_32_32_32:
438 return VK_FORMAT_R32G32B32_SINT;
439 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
440 return VK_FORMAT_R32G32B32A32_SINT;
441 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
442 return VK_FORMAT_A2B10G10R10_SINT_PACK32;
425 } 443 }
426 break; 444 break;
427 case Maxwell::VertexAttribute::Type::SignedScaled: 445 case Maxwell::VertexAttribute::Type::Float:
428 switch (size) { 446 switch (size) {
429 case Maxwell::VertexAttribute::Size::Size_8:
430 return VK_FORMAT_R8_SSCALED;
431 case Maxwell::VertexAttribute::Size::Size_8_8:
432 return VK_FORMAT_R8G8_SSCALED;
433 case Maxwell::VertexAttribute::Size::Size_8_8_8:
434 return VK_FORMAT_R8G8B8_SSCALED;
435 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
436 return VK_FORMAT_R8G8B8A8_SSCALED;
437 case Maxwell::VertexAttribute::Size::Size_16: 447 case Maxwell::VertexAttribute::Size::Size_16:
438 return VK_FORMAT_R16_SSCALED; 448 return VK_FORMAT_R16_SFLOAT;
439 case Maxwell::VertexAttribute::Size::Size_16_16: 449 case Maxwell::VertexAttribute::Size::Size_16_16:
440 return VK_FORMAT_R16G16_SSCALED; 450 return VK_FORMAT_R16G16_SFLOAT;
441 case Maxwell::VertexAttribute::Size::Size_16_16_16: 451 case Maxwell::VertexAttribute::Size::Size_16_16_16:
442 return VK_FORMAT_R16G16B16_SSCALED; 452 return VK_FORMAT_R16G16B16_SFLOAT;
443 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 453 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
444 return VK_FORMAT_R16G16B16A16_SSCALED; 454 return VK_FORMAT_R16G16B16A16_SFLOAT;
445 default:
446 break;
447 }
448 break;
449 case Maxwell::VertexAttribute::Type::Float:
450 switch (size) {
451 case Maxwell::VertexAttribute::Size::Size_32: 455 case Maxwell::VertexAttribute::Size::Size_32:
452 return VK_FORMAT_R32_SFLOAT; 456 return VK_FORMAT_R32_SFLOAT;
453 case Maxwell::VertexAttribute::Size::Size_32_32: 457 case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
456 return VK_FORMAT_R32G32B32_SFLOAT; 460 return VK_FORMAT_R32G32B32_SFLOAT;
457 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 461 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
458 return VK_FORMAT_R32G32B32A32_SFLOAT; 462 return VK_FORMAT_R32G32B32A32_SFLOAT;
459 case Maxwell::VertexAttribute::Size::Size_16:
460 return VK_FORMAT_R16_SFLOAT;
461 case Maxwell::VertexAttribute::Size::Size_16_16:
462 return VK_FORMAT_R16G16_SFLOAT;
463 case Maxwell::VertexAttribute::Size::Size_16_16_16:
464 return VK_FORMAT_R16G16B16_SFLOAT;
465 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
466 return VK_FORMAT_R16G16B16A16_SFLOAT;
467 default:
468 break;
469 } 463 }
470 break; 464 break;
471 } 465 }
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index cd9673d1f..2d9b18ed9 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
155 } 155 }
156 } 156 }
157 157
158 static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; 158 std::vector<const char*> layers;
159 vk::Span<const char*> layers = layers_data; 159 layers.reserve(1);
160 if (!enable_layers) { 160 if (enable_layers) {
161 layers = {}; 161 layers.push_back("VK_LAYER_KHRONOS_validation");
162 }
163
164 const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
165 if (!layer_properties) {
166 LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
167 layers.clear();
168 }
169
170 for (auto layer_it = layers.begin(); layer_it != layers.end();) {
171 const char* const layer = *layer_it;
172 const auto it = std::find_if(
173 layer_properties->begin(), layer_properties->end(),
174 [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
175 if (it == layer_properties->end()) {
176 LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
177 layer_it = layers.erase(layer_it);
178 } else {
179 ++layer_it;
180 }
162 } 181 }
182
163 vk::Instance instance = vk::Instance::Create(layers, extensions, dld); 183 vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
164 if (!instance) { 184 if (!instance) {
165 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); 185 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1fde38328..2be38d419 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
37 37
38} // Anonymous namespace 38} // Anonymous namespace
39 39
40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, 40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
41 std::size_t size) 41 VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
42 : VideoCommon::BufferBlock{cpu_addr, size} { 42 : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
43 VkBufferCreateInfo ci; 43 VkBufferCreateInfo ci;
44 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 44 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
45 ci.pNext = nullptr; 45 ci.pNext = nullptr;
@@ -56,40 +56,15 @@ Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cp
56 56
57Buffer::~Buffer() = default; 57Buffer::~Buffer() = default;
58 58
59VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 59void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) {
60 const VKDevice& device, VKMemoryManager& memory_manager,
61 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
62 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
63 CreateStreamBuffer(device,
64 scheduler)},
65 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
66 staging_pool} {}
67
68VKBufferCache::~VKBufferCache() = default;
69
70std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
71 return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size);
72}
73
74VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
75 size = std::max(size, std::size_t(4));
76 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
77 scheduler.RequestOutsideRenderPassOperationContext();
78 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
79 cmdbuf.FillBuffer(buffer, 0, size, 0);
80 });
81 return *empty.handle;
82}
83
84void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
85 const u8* data) {
86 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 60 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
87 std::memcpy(staging.commit->Map(size), data, size); 61 std::memcpy(staging.commit->Map(size), data, size);
88 62
89 scheduler.RequestOutsideRenderPassOperationContext(); 63 scheduler.RequestOutsideRenderPassOperationContext();
90 scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, 64
91 size](vk::CommandBuffer cmdbuf) { 65 const VkBuffer handle = Handle();
92 cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); 66 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
67 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
93 68
94 VkBufferMemoryBarrier barrier; 69 VkBufferMemoryBarrier barrier;
95 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 70 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -98,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
98 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; 73 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
99 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 74 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
100 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 75 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
101 barrier.buffer = buffer; 76 barrier.buffer = handle;
102 barrier.offset = offset; 77 barrier.offset = offset;
103 barrier.size = size; 78 barrier.size = size;
104 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 79 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@@ -106,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
106 }); 81 });
107} 82}
108 83
109void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 84void Buffer::Download(std::size_t offset, std::size_t size, u8* data) {
110 u8* data) {
111 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 85 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
112 scheduler.RequestOutsideRenderPassOperationContext(); 86 scheduler.RequestOutsideRenderPassOperationContext();
113 scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, 87
114 size](vk::CommandBuffer cmdbuf) { 88 const VkBuffer handle = Handle();
89 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
115 VkBufferMemoryBarrier barrier; 90 VkBufferMemoryBarrier barrier;
116 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 91 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
117 barrier.pNext = nullptr; 92 barrier.pNext = nullptr;
@@ -119,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
119 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 94 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
120 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 95 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
121 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 96 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
122 barrier.buffer = buffer; 97 barrier.buffer = handle;
123 barrier.offset = offset; 98 barrier.offset = offset;
124 barrier.size = size; 99 barrier.size = size;
125 100
@@ -127,17 +102,19 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
127 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 102 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
128 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 103 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
129 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); 104 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
130 cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); 105 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
131 }); 106 });
132 scheduler.Finish(); 107 scheduler.Finish();
133 108
134 std::memcpy(data, staging.commit->Map(size), size); 109 std::memcpy(data, staging.commit->Map(size), size);
135} 110}
136 111
137void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, 112void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
138 std::size_t dst_offset, std::size_t size) { 113 std::size_t size) {
139 scheduler.RequestOutsideRenderPassOperationContext(); 114 scheduler.RequestOutsideRenderPassOperationContext();
140 scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, 115
116 const VkBuffer dst_buffer = Handle();
117 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
141 size](vk::CommandBuffer cmdbuf) { 118 size](vk::CommandBuffer cmdbuf) {
142 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); 119 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
143 120
@@ -165,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
165 }); 142 });
166} 143}
167 144
145VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
146 const VKDevice& device, VKMemoryManager& memory_manager,
147 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
148 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
149 CreateStreamBuffer(device,
150 scheduler)},
151 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
152 staging_pool} {}
153
154VKBufferCache::~VKBufferCache() = default;
155
156std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
157 return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
158 size);
159}
160
161VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
162 size = std::max(size, std::size_t(4));
163 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
164 scheduler.RequestOutsideRenderPassOperationContext();
165 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
166 cmdbuf.FillBuffer(buffer, 0, size, 0);
167 });
168 return {*empty.handle, 0, 0};
169}
170
168} // namespace Vulkan 171} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 9ebbef835..991ee451c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -25,15 +25,29 @@ class VKScheduler;
25 25
26class Buffer final : public VideoCommon::BufferBlock { 26class Buffer final : public VideoCommon::BufferBlock {
27public: 27public:
28 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, 28 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
29 std::size_t size); 29 VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
30 ~Buffer(); 30 ~Buffer();
31 31
32 void Upload(std::size_t offset, std::size_t size, const u8* data);
33
34 void Download(std::size_t offset, std::size_t size, u8* data);
35
36 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
37 std::size_t size);
38
32 VkBuffer Handle() const { 39 VkBuffer Handle() const {
33 return *buffer.handle; 40 return *buffer.handle;
34 } 41 }
35 42
43 u64 Address() const {
44 return 0;
45 }
46
36private: 47private:
48 VKScheduler& scheduler;
49 VKStagingBufferPool& staging_pool;
50
37 VKBuffer buffer; 51 VKBuffer buffer;
38}; 52};
39 53
@@ -44,20 +58,11 @@ public:
44 VKScheduler& scheduler, VKStagingBufferPool& staging_pool); 58 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
45 ~VKBufferCache(); 59 ~VKBufferCache();
46 60
47 VkBuffer GetEmptyBuffer(std::size_t size) override; 61 BufferInfo GetEmptyBuffer(std::size_t size) override;
48 62
49protected: 63protected:
50 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; 64 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
51 65
52 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
53 const u8* data) override;
54
55 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
56 u8* data) override;
57
58 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
59 std::size_t dst_offset, std::size_t size) override;
60
61private: 66private:
62 const VKDevice& device; 67 const VKDevice& device;
63 VKMemoryManager& memory_manager; 68 VKMemoryManager& memory_manager;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index a77fa35c3..a8d94eac3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
143 } 143 }
144} 144}
145 145
146/// @brief Determine if an attachment to be updated has to preserve contents
147/// @param is_clear True when a clear is being executed
148/// @param regs 3D registers
149/// @return True when the contents have to be preserved
150bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
151 if (!is_clear) {
152 return true;
153 }
154 // First we have to make sure all clear masks are enabled.
155 if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
156 !regs.clear_buffers.A) {
157 return true;
158 }
159 // If scissors are disabled, the whole screen is cleared
160 if (!regs.clear_flags.scissor) {
161 return false;
162 }
163 // Then we have to confirm scissor testing clears the whole image
164 const std::size_t index = regs.clear_buffers.RT;
165 const auto& scissor = regs.scissor_test[0];
166 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
167 scissor.max_y < regs.rt[index].height;
168}
169
170/// @brief Determine if an attachment to be updated has to preserve contents
171/// @param is_clear True when a clear is being executed
172/// @param regs 3D registers
173/// @return True when the contents have to be preserved
174bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
175 // If we are not clearing, the contents have to be preserved
176 if (!is_clear) {
177 return true;
178 }
179 // For depth stencil clears we only have to confirm scissor test covers the whole image
180 if (!regs.clear_flags.scissor) {
181 return false;
182 }
183 // Make sure the clear cover the whole image
184 const auto& scissor = regs.scissor_test[0];
185 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
186 scissor.max_y < regs.zeta_height;
187}
188
146} // Anonymous namespace 189} // Anonymous namespace
147 190
148class BufferBindings final { 191class BufferBindings final {
@@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
344 387
345 buffer_cache.Unmap(); 388 buffer_cache.Unmap();
346 389
347 const Texceptions texceptions = UpdateAttachments(); 390 const Texceptions texceptions = UpdateAttachments(false);
348 SetupImageTransitions(texceptions, color_attachments, zeta_attachment); 391 SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
349 392
350 key.renderpass_params = GetRenderPassParams(texceptions); 393 key.renderpass_params = GetRenderPassParams(texceptions);
@@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() {
400 return; 443 return;
401 } 444 }
402 445
403 [[maybe_unused]] const auto texceptions = UpdateAttachments(); 446 [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
404 DEBUG_ASSERT(texceptions.none()); 447 DEBUG_ASSERT(texceptions.none());
405 SetupImageTransitions(0, color_attachments, zeta_attachment); 448 SetupImageTransitions(0, color_attachments, zeta_attachment);
406 449
@@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
677 draw_counter = 0; 720 draw_counter = 0;
678} 721}
679 722
680RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { 723RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
681 MICROPROFILE_SCOPE(Vulkan_RenderTargets); 724 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
682 auto& dirty = system.GPU().Maxwell3D().dirty.flags; 725 auto& maxwell3d = system.GPU().Maxwell3D();
726 auto& dirty = maxwell3d.dirty.flags;
727 auto& regs = maxwell3d.regs;
728
683 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; 729 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
684 dirty[VideoCommon::Dirty::RenderTargets] = false; 730 dirty[VideoCommon::Dirty::RenderTargets] = false;
685 731
@@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
688 Texceptions texceptions; 734 Texceptions texceptions;
689 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { 735 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
690 if (update_rendertargets) { 736 if (update_rendertargets) {
691 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); 737 const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
738 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
692 } 739 }
693 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { 740 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
694 texceptions[rt] = true; 741 texceptions[rt] = true;
@@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
696 } 743 }
697 744
698 if (update_rendertargets) { 745 if (update_rendertargets) {
699 zeta_attachment = texture_cache.GetDepthBufferSurface(true); 746 const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
747 zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
700 } 748 }
701 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { 749 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
702 texceptions[ZETA_TEXCEPTION_INDEX] = true; 750 texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -870,10 +918,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
870 UNIMPLEMENTED_IF(binding.buffer_offset != 0); 918 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
871 919
872 const GPUVAddr gpu_addr = binding.Address(); 920 const GPUVAddr gpu_addr = binding.Address();
873 const auto size = static_cast<VkDeviceSize>(binding.buffer_size); 921 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
874 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 922 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
875 923
876 scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { 924 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
877 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); 925 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
878 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); 926 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
879 }); 927 });
@@ -925,8 +973,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
925 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); 973 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
926 continue; 974 continue;
927 } 975 }
928 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); 976 const auto info = buffer_cache.UploadMemory(start, size);
929 buffer_bindings.AddVertexBinding(buffer, offset); 977 buffer_bindings.AddVertexBinding(info.handle, info.offset);
930 } 978 }
931} 979}
932 980
@@ -948,7 +996,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
948 break; 996 break;
949 } 997 }
950 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 998 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
951 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 999 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1000 VkBuffer buffer = info.handle;
1001 u64 offset = info.offset;
952 std::tie(buffer, offset) = quad_indexed_pass.Assemble( 1002 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
953 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); 1003 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
954 1004
@@ -962,7 +1012,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
962 break; 1012 break;
963 } 1013 }
964 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 1014 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
965 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 1015 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1016 VkBuffer buffer = info.handle;
1017 u64 offset = info.offset;
966 1018
967 auto format = regs.index_array.format; 1019 auto format = regs.index_array.format;
968 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; 1020 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -1109,10 +1161,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1109 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); 1161 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1110 ASSERT(size <= MaxConstbufferSize); 1162 ASSERT(size <= MaxConstbufferSize);
1111 1163
1112 const auto [buffer_handle, offset] = 1164 const auto info =
1113 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1165 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
1114 1166 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1115 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
1116} 1167}
1117 1168
1118void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { 1169void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1126,14 +1177,14 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1126 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1177 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1127 // default buffer. 1178 // default buffer.
1128 static constexpr std::size_t dummy_size = 4; 1179 static constexpr std::size_t dummy_size = 4;
1129 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); 1180 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1130 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); 1181 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1131 return; 1182 return;
1132 } 1183 }
1133 1184
1134 const auto [buffer, offset] = buffer_cache.UploadMemory( 1185 const auto info = buffer_cache.UploadMemory(
1135 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); 1186 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1136 update_descriptor_queue.AddBuffer(buffer, offset, size); 1187 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1137} 1188}
1138 1189
1139void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, 1190void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
@@ -1154,7 +1205,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
1154 const auto sampler = sampler_cache.GetSampler(texture.tsc); 1205 const auto sampler = sampler_cache.GetSampler(texture.tsc);
1155 update_descriptor_queue.AddSampledImage(sampler, image_view); 1206 update_descriptor_queue.AddSampledImage(sampler, image_view);
1156 1207
1157 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1208 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1158 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 1209 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1159 sampled_views.push_back(ImageView{std::move(view), image_layout}); 1210 sampled_views.push_back(ImageView{std::move(view), image_layout});
1160} 1211}
@@ -1180,7 +1231,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
1180 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); 1231 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1181 update_descriptor_queue.AddImage(image_view); 1232 update_descriptor_queue.AddImage(image_view);
1182 1233
1183 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1234 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1184 *image_layout = VK_IMAGE_LAYOUT_GENERAL; 1235 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1185 image_views.push_back(ImageView{std::move(view), image_layout}); 1236 image_views.push_back(ImageView{std::move(view), image_layout});
1186} 1237}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c8c187606..83e00e7e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
159 159
160 void FlushWork(); 160 void FlushWork();
161 161
162 Texceptions UpdateAttachments(); 162 /// @brief Updates the currently bound attachments
163 /// @param is_clear True when the framebuffer is updated as a clear
164 /// @return Bitfield of attachments being used as sampled textures
165 Texceptions UpdateAttachments(bool is_clear);
163 166
164 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); 167 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
165 168
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e..56524e6f3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,6 +9,7 @@
9#include <utility> 9#include <utility>
10 10
11#include "common/microprofile.h" 11#include "common/microprofile.h"
12#include "common/thread.h"
12#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_query_cache.h" 14#include "video_core/renderer_vulkan/vk_query_cache.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
133} 134}
134 135
135void VKScheduler::WorkerThread() { 136void VKScheduler::WorkerThread() {
137 Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
136 std::unique_lock lock{mutex}; 138 std::unique_lock lock{mutex};
137 do { 139 do {
138 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); 140 cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index c765c60a0..689f0d276 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,10 +35,14 @@ public:
35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
36 void Unmap(u64 size); 36 void Unmap(u64 size);
37 37
38 VkBuffer Handle() const { 38 VkBuffer Handle() const noexcept {
39 return *buffer; 39 return *buffer;
40 } 40 }
41 41
42 u64 Address() const noexcept {
43 return 0;
44 }
45
42private: 46private:
43 struct Watch final { 47 struct Watch final {
44 VKFenceWatch fence; 48 VKFenceWatch fence;
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
24} 24}
25 25
26void VKUpdateDescriptorQueue::Acquire() { 26void VKUpdateDescriptorQueue::Acquire() {
27 entries.clear(); 27 // Minimum number of entries required.
28} 28 // This is the maximum number of entries a single draw call migth use.
29 static constexpr std::size_t MIN_ENTRIES = 0x400;
29 30
30void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, 31 if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
31 VkDescriptorSet set) {
32 if (payload.size() + entries.size() >= payload.max_size()) {
33 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); 32 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
34 scheduler.WaitWorker(); 33 scheduler.WaitWorker();
35 payload.clear(); 34 payload.clear();
36 } 35 }
36 upload_start = &*payload.end();
37}
37 38
38 // TODO(Rodrigo): Rework to write the payload directly 39void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
39 const auto payload_start = payload.data() + payload.size(); 40 VkDescriptorSet set) {
40 for (const auto& entry : entries) { 41 const void* const data = upload_start;
41 if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { 42 const vk::Device* const logical = &device.GetLogical();
42 payload.push_back(*image); 43 scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
43 } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { 44 logical->UpdateDescriptorSet(set, update_template, data);
44 payload.push_back(*buffer); 45 });
45 } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
46 payload.push_back(*texel);
47 } else {
48 UNREACHABLE();
49 }
50 }
51
52 scheduler.Record(
53 [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
54 logical->UpdateDescriptorSet(set, update_template, payload_start);
55 });
56} 46}
57 47
58} // namespace Vulkan 48} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
15class VKDevice; 15class VKDevice;
16class VKScheduler; 16class VKScheduler;
17 17
18class DescriptorUpdateEntry { 18struct DescriptorUpdateEntry {
19public: 19 DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
20 explicit DescriptorUpdateEntry() {}
21
22 DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
23 20
24 DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} 21 DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
25 22
26 DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} 23 DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
27 24
28private:
29 union { 25 union {
30 VkDescriptorImageInfo image; 26 VkDescriptorImageInfo image;
31 VkDescriptorBufferInfo buffer; 27 VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
45 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
46 42
47 void AddSampledImage(VkSampler sampler, VkImageView image_view) { 43 void AddSampledImage(VkSampler sampler, VkImageView image_view) {
48 entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); 44 payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
49 } 45 }
50 46
51 void AddImage(VkImageView image_view) { 47 void AddImage(VkImageView image_view) {
52 entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); 48 payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
53 } 49 }
54 50
55 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { 51 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
56 entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); 52 payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
57 } 53 }
58 54
59 void AddTexelBuffer(VkBufferView texel_buffer) { 55 void AddTexelBuffer(VkBufferView texel_buffer) {
60 entries.emplace_back(texel_buffer); 56 payload.emplace_back(texel_buffer);
61 } 57 }
62 58
63 VkImageLayout* GetLastImageLayout() { 59 VkImageLayout* LastImageLayout() {
64 return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; 60 return &payload.back().image.imageLayout;
65 } 61 }
66 62
67private: 63 const VkImageLayout* LastImageLayout() const {
68 using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; 64 return &payload.back().image.imageLayout;
65 }
69 66
67private:
70 const VKDevice& device; 68 const VKDevice& device;
71 VKScheduler& scheduler; 69 VKScheduler& scheduler;
72 70
73 boost::container::static_vector<Variant, 0x400> entries; 71 const DescriptorUpdateEntry* upload_start = nullptr;
74 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; 72 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
75}; 73};
76 74
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..0d485a662 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
153 153
154bool Load(InstanceDispatch& dld) noexcept { 154bool Load(InstanceDispatch& dld) noexcept {
155#define X(name) Proc(dld.name, dld, #name) 155#define X(name) Proc(dld.name, dld, #name)
156 return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); 156 return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
157 X(vkEnumerateInstanceLayerProperties);
157#undef X 158#undef X
158} 159}
159 160
@@ -725,8 +726,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
725 return supported == VK_TRUE; 726 return supported == VK_TRUE;
726} 727}
727 728
728VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const 729VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
729 noexcept {
730 VkSurfaceCapabilitiesKHR capabilities; 730 VkSurfaceCapabilitiesKHR capabilities;
731 Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); 731 Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
732 return capabilities; 732 return capabilities;
@@ -771,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
771 return properties; 771 return properties;
772} 772}
773 773
774std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
775 const InstanceDispatch& dld) {
776 u32 num;
777 if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
778 return std::nullopt;
779 }
780 std::vector<VkLayerProperties> properties(num);
781 if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
782 return std::nullopt;
783 }
784 return properties;
785}
786
774} // namespace Vulkan::vk 787} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..d56fdb3f9 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -141,6 +141,7 @@ struct InstanceDispatch {
141 PFN_vkCreateInstance vkCreateInstance; 141 PFN_vkCreateInstance vkCreateInstance;
142 PFN_vkDestroyInstance vkDestroyInstance; 142 PFN_vkDestroyInstance vkDestroyInstance;
143 PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; 143 PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
144 PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
144 145
145 PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; 146 PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
146 PFN_vkCreateDevice vkCreateDevice; 147 PFN_vkCreateDevice vkCreateDevice;
@@ -779,7 +780,7 @@ public:
779 780
780 bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; 781 bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
781 782
782 VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; 783 VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
783 784
784 std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; 785 std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
785 786
@@ -996,4 +997,7 @@ private:
996std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( 997std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
997 const InstanceDispatch& dld); 998 const InstanceDispatch& dld);
998 999
1000std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
1001 const InstanceDispatch& dld);
1002
999} // namespace Vulkan::vk 1003} // namespace Vulkan::vk
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
13 13
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16using std::move;
16using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
18 20
19u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
22 24
23 if (instr.hset2.ftz == 0) { 25 PredCondition cond;
24 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); 26 bool bf;
27 bool ftz;
28 bool neg_a;
29 bool abs_a;
30 bool neg_b;
31 bool abs_b;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
25 } 54 }
26 55
27 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); 56 Node op_b = [this, instr, opcode] {
28 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
29
30 Node op_b = [&]() {
31 switch (opcode->get().GetId()) { 57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
32 case OpCode::Id::HSET2_R: 62 case OpCode::Id::HSET2_R:
33 return GetRegister(instr.gpr20); 63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
34 default: 66 default:
35 UNREACHABLE(); 67 UNREACHABLE();
36 return Immediate(0); 68 return Node{};
37 } 69 }
38 }(); 70 }();
39 op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
40 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
41 71
42 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); 72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
43 89
44 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); 90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
45 93
46 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); 94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
47 95
48 // HSET2 operates on each half float in the pack. 96 // HSET2 operates on each half float in the pack.
49 std::array<Node, 2> values; 97 std::array<Node, 2> values;
50 for (u32 i = 0; i < 2; ++i) { 98 for (u32 i = 0; i < 2; ++i) {
51 const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; 99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
52 const Node true_value = Immediate(raw_value << (i * 16)); 100 Node true_value = Immediate(raw_value << (i * 16));
53 const Node false_value = Immediate(0); 101 Node false_value = Immediate(0);
54
55 const Node comparison =
56 Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
57 const Node predicate = Operation(combiner, comparison, second_pred);
58 102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
59 values[i] = 105 values[i] =
60 Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); 106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
61 } 107 }
62 108
63 const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); 109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
64 SetRegister(bb, instr.gpr0, value); 110 SetRegister(bb, instr.gpr0, move(value));
65 111
66 return pc; 112 return pc;
67} 113}
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 60b6ad72a..07778dc3e 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
97 break; 97 break;
98 case TextureFormat::B5G6R5: 98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5: 99 case TextureFormat::B6G5R5:
100 case TextureFormat::BF10GF11RF11:
100 if (component == 0) { 101 if (component == 0) {
101 return descriptor.b_type; 102 return descriptor.b_type;
102 } 103 }
@@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
119 } 120 }
120 break; 121 break;
121 } 122 }
122 UNIMPLEMENTED_MSG("texture format not implement={}", format); 123 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
123 return ComponentType::FLOAT; 124 return ComponentType::FLOAT;
124} 125}
125 126
@@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
191 return 6; 192 return 6;
192 } 193 }
193 return 0; 194 return 0;
195 case TextureFormat::BF10GF11RF11:
196 if (component == 1 || component == 2) {
197 return 11;
198 }
199 if (component == 0) {
200 return 10;
201 }
202 return 0;
194 case TextureFormat::G8R24: 203 case TextureFormat::G8R24:
195 if (component == 0) { 204 if (component == 0) {
196 return 8; 205 return 8;
@@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
211 return (component == 0 || component == 1) ? 8 : 0; 220 return (component == 0 || component == 1) ? 8 : 0;
212 case TextureFormat::G4R4: 221 case TextureFormat::G4R4:
213 return (component == 0 || component == 1) ? 4 : 0; 222 return (component == 0 || component == 1) ? 4 : 0;
214 default:
215 UNIMPLEMENTED_MSG("texture format not implement={}", format);
216 return 0;
217 } 223 }
224 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
225 return 0;
218} 226}
219 227
220std::size_t GetImageComponentMask(TextureFormat format) { 228std::size_t GetImageComponentMask(TextureFormat format) {
@@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
235 case TextureFormat::R32_B24G8: 243 case TextureFormat::R32_B24G8:
236 case TextureFormat::B5G6R5: 244 case TextureFormat::B5G6R5:
237 case TextureFormat::B6G5R5: 245 case TextureFormat::B6G5R5:
246 case TextureFormat::BF10GF11RF11:
238 return std::size_t{R | G | B}; 247 return std::size_t{R | G | B};
239 case TextureFormat::R32_G32: 248 case TextureFormat::R32_G32:
240 case TextureFormat::R16_G16: 249 case TextureFormat::R16_G16:
@@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) {
248 case TextureFormat::R8: 257 case TextureFormat::R8:
249 case TextureFormat::R1: 258 case TextureFormat::R1:
250 return std::size_t{R}; 259 return std::size_t{R};
251 default:
252 UNIMPLEMENTED_MSG("texture format not implement={}", format);
253 return std::size_t{R | G | B | A};
254 } 260 }
261 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
262 return std::size_t{R | G | B | A};
255} 263}
256 264
257std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { 265std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
299 return {std::move(original_value), true}; 307 return {std::move(original_value), true};
300 } 308 }
301 default: 309 default:
302 UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); 310 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
303 return {std::move(original_value), true}; 311 return {std::move(original_value), true};
304 } 312 }
305} 313}
@@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
459 default: 467 default:
460 break; 468 break;
461 } 469 }
462 UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", 470 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
463 static_cast<u64>(instr.suatom_d.operation.Value()), 471 static_cast<u64>(instr.suatom_d.operation.Value()),
464 static_cast<u64>(instr.suatom_d.operation_type.Value())); 472 static_cast<u64>(instr.suatom_d.operation_type.Value()));
465 return OperationCode::AtomicImageAdd; 473 return OperationCode::AtomicImageAdd;
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 94d3a6ae5..0caf3b4f0 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
120 } 120 }
121 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; 121 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
122 const auto layer{static_cast<u32>(relative_address / layer_size)}; 122 const auto layer{static_cast<u32>(relative_address / layer_size)};
123 if (layer >= params.depth) {
124 return {};
125 }
123 const GPUVAddr mipmap_address = relative_address - layer_size * layer; 126 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
124 const auto mipmap_it = 127 const auto mipmap_it =
125 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); 128 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index b543fc8c0..6207d8dfe 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -24,6 +24,7 @@
24#include "core/core.h" 24#include "core/core.h"
25#include "core/memory.h" 25#include "core/memory.h"
26#include "core/settings.h" 26#include "core/settings.h"
27#include "video_core/compatible_formats.h"
27#include "video_core/dirty_flags.h" 28#include "video_core/dirty_flags.h"
28#include "video_core/engines/fermi_2d.h" 29#include "video_core/engines/fermi_2d.h"
29#include "video_core/engines/maxwell_3d.h" 30#include "video_core/engines/maxwell_3d.h"
@@ -47,8 +48,8 @@ class RasterizerInterface;
47 48
48namespace VideoCommon { 49namespace VideoCommon {
49 50
51using VideoCore::Surface::FormatCompatibility;
50using VideoCore::Surface::PixelFormat; 52using VideoCore::Surface::PixelFormat;
51
52using VideoCore::Surface::SurfaceTarget; 53using VideoCore::Surface::SurfaceTarget;
53using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; 54using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
54 55
@@ -595,7 +596,7 @@ private:
595 } else { 596 } else {
596 new_surface = GetUncachedSurface(gpu_addr, params); 597 new_surface = GetUncachedSurface(gpu_addr, params);
597 } 598 }
598 const auto& final_params = new_surface->GetSurfaceParams(); 599 const SurfaceParams& final_params = new_surface->GetSurfaceParams();
599 if (cr_params.type != final_params.type) { 600 if (cr_params.type != final_params.type) {
600 if (Settings::IsGPULevelExtreme()) { 601 if (Settings::IsGPULevelExtreme()) {
601 BufferCopy(current_surface, new_surface); 602 BufferCopy(current_surface, new_surface);
@@ -603,7 +604,7 @@ private:
603 } else { 604 } else {
604 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); 605 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
605 for (auto& brick : bricks) { 606 for (auto& brick : bricks) {
606 ImageCopy(current_surface, new_surface, brick); 607 TryCopyImage(current_surface, new_surface, brick);
607 } 608 }
608 } 609 }
609 Unregister(current_surface); 610 Unregister(current_surface);
@@ -694,7 +695,7 @@ private:
694 } 695 }
695 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, 696 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
696 src_params.depth); 697 src_params.depth);
697 ImageCopy(surface, new_surface, copy_params); 698 TryCopyImage(surface, new_surface, copy_params);
698 } 699 }
699 } 700 }
700 if (passed_tests == 0) { 701 if (passed_tests == 0) {
@@ -791,7 +792,7 @@ private:
791 const u32 width = params.width; 792 const u32 width = params.width;
792 const u32 height = params.height; 793 const u32 height = params.height;
793 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); 794 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
794 ImageCopy(surface, new_surface, copy_params); 795 TryCopyImage(surface, new_surface, copy_params);
795 } 796 }
796 for (const auto& surface : overlaps) { 797 for (const auto& surface : overlaps) {
797 Unregister(surface); 798 Unregister(surface);
@@ -1053,7 +1054,7 @@ private:
1053 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, 1054 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1054 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { 1055 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1055 auto deduced_src = DeduceSurface(src_gpu_addr, src_params); 1056 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1056 auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); 1057 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1057 if (deduced_src.Failed() || deduced_dst.Failed()) { 1058 if (deduced_src.Failed() || deduced_dst.Failed()) {
1058 return; 1059 return;
1059 } 1060 }
@@ -1192,6 +1193,19 @@ private:
1192 return {}; 1193 return {};
1193 } 1194 }
1194 1195
1196 /// Try to do an image copy logging when formats are incompatible.
1197 void TryCopyImage(TSurface& src, TSurface& dst, const CopyParams& copy) {
1198 const SurfaceParams& src_params = src->GetSurfaceParams();
1199 const SurfaceParams& dst_params = dst->GetSurfaceParams();
1200 if (!format_compatibility.TestCopy(src_params.pixel_format, dst_params.pixel_format)) {
1201 LOG_ERROR(HW_GPU, "Illegal copy between formats={{{}, {}}}",
1202 static_cast<int>(dst_params.pixel_format),
1203 static_cast<int>(src_params.pixel_format));
1204 return;
1205 }
1206 ImageCopy(src, dst, copy);
1207 }
1208
1195 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { 1209 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
1196 return siblings_table[static_cast<std::size_t>(format)]; 1210 return siblings_table[static_cast<std::size_t>(format)];
1197 } 1211 }
@@ -1241,6 +1255,7 @@ private:
1241 VideoCore::RasterizerInterface& rasterizer; 1255 VideoCore::RasterizerInterface& rasterizer;
1242 1256
1243 FormatLookupTable format_lookup_table; 1257 FormatLookupTable format_lookup_table;
1258 FormatCompatibility format_compatibility;
1244 1259
1245 u64 ticks{}; 1260 u64 ticks{};
1246 1261