summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/service/am/am.cpp4
-rw-r--r--src/core/hle/service/audio/audren_u.cpp4
-rw-r--r--src/core/hle/service/es/es.cpp1
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp2
-rw-r--r--src/video_core/dma_pusher.cpp4
-rw-r--r--src/video_core/dma_pusher.h11
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/renderer_vulkan/shaders/quad_indexed.comp50
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp205
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h5
14 files changed, 343 insertions, 58 deletions
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 557608e76..3ece2cf3c 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -903,7 +903,7 @@ private:
903 void PopOutData(Kernel::HLERequestContext& ctx) { 903 void PopOutData(Kernel::HLERequestContext& ctx) {
904 LOG_DEBUG(Service_AM, "called"); 904 LOG_DEBUG(Service_AM, "called");
905 905
906 const auto storage = applet->GetBroker().PopNormalDataToGame(); 906 auto storage = applet->GetBroker().PopNormalDataToGame();
907 if (storage == nullptr) { 907 if (storage == nullptr) {
908 LOG_ERROR(Service_AM, 908 LOG_ERROR(Service_AM,
909 "storage is a nullptr. There is no data in the current normal channel"); 909 "storage is a nullptr. There is no data in the current normal channel");
@@ -934,7 +934,7 @@ private:
934 void PopInteractiveOutData(Kernel::HLERequestContext& ctx) { 934 void PopInteractiveOutData(Kernel::HLERequestContext& ctx) {
935 LOG_DEBUG(Service_AM, "called"); 935 LOG_DEBUG(Service_AM, "called");
936 936
937 const auto storage = applet->GetBroker().PopInteractiveDataToGame(); 937 auto storage = applet->GetBroker().PopInteractiveDataToGame();
938 if (storage == nullptr) { 938 if (storage == nullptr) {
939 LOG_ERROR(Service_AM, 939 LOG_ERROR(Service_AM,
940 "storage is a nullptr. There is no data in the current interactive channel"); 940 "storage is a nullptr. There is no data in the current interactive channel");
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 175cabf45..07dd2caec 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -92,7 +92,7 @@ private:
92 } 92 }
93 93
94 void RequestUpdateImpl(Kernel::HLERequestContext& ctx) { 94 void RequestUpdateImpl(Kernel::HLERequestContext& ctx) {
95 LOG_WARNING(Service_Audio, "(STUBBED) called"); 95 LOG_DEBUG(Service_Audio, "(STUBBED) called");
96 96
97 ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer())); 97 ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer()));
98 IPC::ResponseBuilder rb{ctx, 2}; 98 IPC::ResponseBuilder rb{ctx, 2};
@@ -252,8 +252,6 @@ private:
252 } 252 }
253 253
254 void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { 254 void GetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
255 IPC::RequestParser rp{ctx};
256
257 const auto device_name_buffer = ctx.ReadBuffer(); 255 const auto device_name_buffer = ctx.ReadBuffer();
258 const std::string name = Common::StringFromBuffer(device_name_buffer); 256 const std::string name = Common::StringFromBuffer(device_name_buffer);
259 257
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index df00ae625..86f36915a 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -76,7 +76,6 @@ private:
76 } 76 }
77 77
78 void ImportTicket(Kernel::HLERequestContext& ctx) { 78 void ImportTicket(Kernel::HLERequestContext& ctx) {
79 IPC::RequestParser rp{ctx};
80 const auto ticket = ctx.ReadBuffer(); 79 const auto ticket = ctx.ReadBuffer();
81 const auto cert = ctx.ReadBuffer(1); 80 const auto cert = ctx.ReadBuffer(1);
82 81
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index 2ccfffc19..c55d900e2 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -502,7 +502,7 @@ void Controller_NPad::SetNpadMode(u32 npad_id, NPadAssignments assignment_mode)
502 502
503void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids, 503void Controller_NPad::VibrateController(const std::vector<u32>& controller_ids,
504 const std::vector<Vibration>& vibrations) { 504 const std::vector<Vibration>& vibrations) {
505 LOG_WARNING(Service_HID, "(STUBBED) called"); 505 LOG_DEBUG(Service_HID, "(STUBBED) called");
506 506
507 if (!can_controllers_vibrate) { 507 if (!can_controllers_vibrate) {
508 return; 508 return;
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 713c14182..0b77afc71 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,7 +12,7 @@
12 12
13namespace Tegra { 13namespace Tegra {
14 14
15DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} 15DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {}
16 16
17DmaPusher::~DmaPusher() = default; 17DmaPusher::~DmaPusher() = default;
18 18
@@ -26,7 +26,7 @@ void DmaPusher::DispatchCalls() {
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
29 while (Core::System::GetInstance().IsPoweredOn()) { 29 while (system.IsPoweredOn()) {
30 if (!Step()) { 30 if (!Step()) {
31 break; 31 break;
32 } 32 }
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 6ab06518f..d6188614a 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -10,6 +10,10 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13namespace Core {
14class System;
15}
16
13namespace Tegra { 17namespace Tegra {
14 18
15enum class SubmissionMode : u32 { 19enum class SubmissionMode : u32 {
@@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>;
56 */ 60 */
57class DmaPusher { 61class DmaPusher {
58public: 62public:
59 explicit DmaPusher(GPU& gpu); 63 explicit DmaPusher(Core::System& system, GPU& gpu);
60 ~DmaPusher(); 64 ~DmaPusher();
61 65
62 void Push(CommandList&& entries) { 66 void Push(CommandList&& entries) {
@@ -72,8 +76,6 @@ private:
72 76
73 void CallMethod(u32 argument) const; 77 void CallMethod(u32 argument) const;
74 78
75 GPU& gpu;
76
77 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once 79 std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once
78 80
79 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed 81 std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed
@@ -92,6 +94,9 @@ private:
92 94
93 GPUVAddr dma_mget{}; ///< main pushbuffer last read address 95 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
94 bool ib_enable{true}; ///< IB mode enabled 96 bool ib_enable{true}; ///< IB mode enabled
97
98 GPU& gpu;
99 Core::System& system;
95}; 100};
96 101
97} // namespace Tegra 102} // namespace Tegra
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8acf2eda2..a606f4abd 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render
27 : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { 27 : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
28 auto& rasterizer{renderer->Rasterizer()}; 28 auto& rasterizer{renderer->Rasterizer()};
29 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); 29 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
30 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 30 dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
31 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 31 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
32 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); 32 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
33 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); 33 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
new file mode 100644
index 000000000..5a472ba9b
--- /dev/null
+++ b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp
@@ -0,0 +1,50 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5/*
6 * Build instructions:
7 * $ glslangValidator -V quad_indexed.comp -o output.spv
8 * $ spirv-opt -O --strip-debug output.spv -o optimized.spv
9 * $ xxd -i optimized.spv
10 *
11 * Then copy that bytecode to the C++ file
12 */
13
14#version 460 core
15
16layout (local_size_x = 1024) in;
17
18layout (std430, set = 0, binding = 0) readonly buffer InputBuffer {
19 uint input_indexes[];
20};
21
22layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer {
23 uint output_indexes[];
24};
25
26layout (push_constant) uniform PushConstants {
27 uint base_vertex;
28 int index_shift; // 0: uint8, 1: uint16, 2: uint32
29};
30
31void main() {
32 int primitive = int(gl_GlobalInvocationID.x);
33 if (primitive * 6 >= output_indexes.length()) {
34 return;
35 }
36
37 int index_size = 8 << index_shift;
38 int flipped_shift = 2 - index_shift;
39 int mask = (1 << flipped_shift) - 1;
40
41 const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3);
42 for (uint vertex = 0; vertex < 6; ++vertex) {
43 int offset = primitive * 4 + quad_swizzle[vertex];
44 int int_offset = offset >> flipped_shift;
45 int bit_offset = (offset & mask) * index_size;
46 uint packed_input = input_indexes[int_offset];
47 uint index = bitfieldExtract(packed_input, bit_offset, index_size);
48 output_indexes[primitive * 6 + vertex] = index + base_vertex;
49 }
50}
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 878a78755..7b0268033 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt
135 return entry; 135 return entry;
136} 136}
137 137
138VkPushConstantRange BuildQuadArrayPassPushConstantRange() { 138VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
139 VkPushConstantRange range; 139 VkPushConstantRange range;
140 range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; 140 range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
141 range.offset = 0; 141 range.offset = 0;
142 range.size = sizeof(u32); 142 range.size = static_cast<u32>(size);
143 return range; 143 return range;
144} 144}
145 145
@@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = {
220 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 220 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
221 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; 221 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
222 222
223std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { 223// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
224constexpr u8 QUAD_INDEXED_SPV[] = {
225 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
226 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
227 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
228 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
229 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
230 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
231 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
232 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
233 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
234 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
235 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
236 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
237 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
238 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
239 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
240 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
241 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
242 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
243 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
244 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
245 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
246 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
247 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
248 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
249 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
250 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
251 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
252 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
253 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
254 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
255 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
256 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
257 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
258 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
259 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
260 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
261 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
262 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
263 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
264 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
265 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
266 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
267 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
268 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
269 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
270 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
271 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
272 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
273 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
274 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
275 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
276 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
277 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
278 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
279 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
280 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
281 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
282 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
283 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
284 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
285 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
286 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
287 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
288 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
289 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
290 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
291 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
292 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
293 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
294 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
295 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
296 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
297 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
298 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
299 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
300 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
301 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
302 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
303 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
304 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
305 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
306 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
307 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
308 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
309 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
310 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
311 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
312 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
313 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
314 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
315 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
316 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
317 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
318 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
319 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
320 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
321 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
322 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
323 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
324 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
325 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
326 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
327 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
328 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
329 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
330 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
331 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
332 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
333 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
334 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
335 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
336 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
337 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
338 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
339 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
340 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
341 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
342 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
343 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
344 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
345
346std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
224 std::array<VkDescriptorSetLayoutBinding, 2> bindings; 347 std::array<VkDescriptorSetLayoutBinding, 2> bindings;
225 bindings[0].binding = 0; 348 bindings[0].binding = 0;
226 bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 349 bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings(
235 return bindings; 358 return bindings;
236} 359}
237 360
238VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { 361VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
239 VkDescriptorUpdateTemplateEntryKHR entry; 362 VkDescriptorUpdateTemplateEntryKHR entry;
240 entry.dstBinding = 0; 363 entry.dstBinding = 0;
241 entry.dstArrayElement = 0; 364 entry.dstArrayElement = 0;
@@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
337 VKUpdateDescriptorQueue& update_descriptor_queue) 460 VKUpdateDescriptorQueue& update_descriptor_queue)
338 : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), 461 : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
339 BuildQuadArrayPassDescriptorUpdateTemplateEntry(), 462 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
340 BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), 463 BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
341 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 464 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
342 update_descriptor_queue{update_descriptor_queue} {} 465 update_descriptor_queue{update_descriptor_queue} {}
343 466
344QuadArrayPass::~QuadArrayPass() = default; 467QuadArrayPass::~QuadArrayPass() = default;
345 468
346std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { 469std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
347 const u32 num_triangle_vertices = num_vertices * 6 / 4; 470 const u32 num_triangle_vertices = (num_vertices / 4) * 6;
348 const std::size_t staging_size = num_triangle_vertices * sizeof(u32); 471 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
349 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); 472 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
350 473
@@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
383Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, 506Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
384 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, 507 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
385 VKUpdateDescriptorQueue& update_descriptor_queue) 508 VKUpdateDescriptorQueue& update_descriptor_queue)
386 : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), 509 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
387 BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), 510 BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
388 uint8_pass), 511 uint8_pass),
389 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 512 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
390 update_descriptor_queue{update_descriptor_queue} {} 513 update_descriptor_queue{update_descriptor_queue} {}
@@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff
425 return {*buffer.handle, 0}; 548 return {*buffer.handle, 0};
426} 549}
427 550
551QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
552 VKDescriptorPool& descriptor_pool,
553 VKStagingBufferPool& staging_buffer_pool,
554 VKUpdateDescriptorQueue& update_descriptor_queue)
555 : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
556 BuildInputOutputDescriptorUpdateTemplate(),
557 BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
558 QUAD_INDEXED_SPV),
559 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
560 update_descriptor_queue{update_descriptor_queue} {}
561
562QuadIndexedPass::~QuadIndexedPass() = default;
563
564std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
565 Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex,
566 VkBuffer src_buffer, u64 src_offset) {
567 const u32 index_shift = [index_format] {
568 switch (index_format) {
569 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte:
570 return 0;
571 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort:
572 return 1;
573 case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt:
574 return 2;
575 }
576 UNREACHABLE();
577 return 2;
578 }();
579 const u32 input_size = num_vertices << index_shift;
580 const u32 num_tri_vertices = (num_vertices / 4) * 6;
581
582 const std::size_t staging_size = num_tri_vertices * sizeof(u32);
583 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
584
585 update_descriptor_queue.Acquire();
586 update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
587 update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
588 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
589
590 scheduler.RequestOutsideRenderPassOperationContext();
591 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
592 num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) {
593 static constexpr u32 dispatch_size = 1024;
594 const std::array push_constants = {base_vertex, index_shift};
595 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
596 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
597 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
598 &push_constants);
599 cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1);
600
601 VkBufferMemoryBarrier barrier;
602 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
603 barrier.pNext = nullptr;
604 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
605 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
606 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
607 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
608 barrier.buffer = buffer;
609 barrier.offset = 0;
610 barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32));
611 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
612 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
613 });
614 return {*buffer.handle, 0};
615}
616
428} // namespace Vulkan 617} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index ec80c8683..26bf834de 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -8,6 +8,7 @@
8#include <utility> 8#include <utility>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
11#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 12#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
12#include "video_core/renderer_vulkan/wrapper.h" 13#include "video_core/renderer_vulkan/wrapper.h"
13 14
@@ -73,4 +74,22 @@ private:
73 VKUpdateDescriptorQueue& update_descriptor_queue; 74 VKUpdateDescriptorQueue& update_descriptor_queue;
74}; 75};
75 76
77class QuadIndexedPass final : public VKComputePass {
78public:
79 explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler,
80 VKDescriptorPool& descriptor_pool,
81 VKStagingBufferPool& staging_buffer_pool,
82 VKUpdateDescriptorQueue& update_descriptor_queue);
83 ~QuadIndexedPass();
84
85 std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format,
86 u32 num_vertices, u32 base_vertex, VkBuffer src_buffer,
87 u64 src_offset);
88
89private:
90 VKScheduler& scheduler;
91 VKStagingBufferPool& staging_buffer_pool;
92 VKUpdateDescriptorQueue& update_descriptor_queue;
93};
94
76} // namespace Vulkan 95} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 4ca0febb8..857bea19f 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -293,6 +293,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
293 update_descriptor_queue(device, scheduler), renderpass_cache(device), 293 update_descriptor_queue(device, scheduler), renderpass_cache(device),
294 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 294 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
295 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 295 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
296 quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
296 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, 297 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
297 staging_pool), 298 staging_pool),
298 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, 299 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
@@ -844,18 +845,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
844 bool is_indexed) { 845 bool is_indexed) {
845 const auto& regs = system.GPU().Maxwell3D().regs; 846 const auto& regs = system.GPU().Maxwell3D().regs;
846 switch (regs.draw.topology) { 847 switch (regs.draw.topology) {
847 case Maxwell::PrimitiveTopology::Quads: 848 case Maxwell::PrimitiveTopology::Quads: {
848 if (params.is_indexed) { 849 if (!params.is_indexed) {
849 UNIMPLEMENTED();
850 } else {
851 const auto [buffer, offset] = 850 const auto [buffer, offset] =
852 quad_array_pass.Assemble(params.num_vertices, params.base_vertex); 851 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
853 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); 852 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
854 params.base_vertex = 0; 853 params.base_vertex = 0;
855 params.num_vertices = params.num_vertices * 6 / 4; 854 params.num_vertices = params.num_vertices * 6 / 4;
856 params.is_indexed = true; 855 params.is_indexed = true;
856 break;
857 } 857 }
858 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
859 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
860 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
861 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
862
863 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
864 params.num_vertices = (params.num_vertices / 4) * 6;
865 params.base_vertex = 0;
858 break; 866 break;
867 }
859 default: { 868 default: {
860 if (!is_indexed) { 869 if (!is_indexed) {
861 break; 870 break;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 46037860a..d9108f862 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -254,6 +254,7 @@ private:
254 VKUpdateDescriptorQueue update_descriptor_queue; 254 VKUpdateDescriptorQueue update_descriptor_queue;
255 VKRenderPassCache renderpass_cache; 255 VKRenderPassCache renderpass_cache;
256 QuadArrayPass quad_array_pass; 256 QuadArrayPass quad_array_pass;
257 QuadIndexedPass quad_indexed_pass;
257 Uint8Pass uint8_pass; 258 Uint8Pass uint8_pass;
258 259
259 VKTextureCache texture_cache; 260 VKTextureCache texture_cache;
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 38a93a01a..868447af2 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <limits>
6#include <optional> 7#include <optional>
7#include <tuple> 8#include <tuple>
8#include <vector> 9#include <vector>
@@ -22,22 +23,38 @@ namespace {
22constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; 23constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
23constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; 24constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
24 25
25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; 26constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
26 27
27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, 28/// Find a memory type with the passed requirements
28 VkMemoryPropertyFlags wanted) { 29std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
29 const auto properties = device.GetPhysical().GetMemoryProperties(); 30 VkMemoryPropertyFlags wanted,
30 for (u32 i = 0; i < properties.memoryTypeCount; i++) { 31 u32 filter = std::numeric_limits<u32>::max()) {
31 if (!(filter & (1 << i))) { 32 for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
32 continue; 33 const auto flags = properties.memoryTypes[i].propertyFlags;
33 } 34 if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
34 if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) {
35 return i; 35 return i;
36 } 36 }
37 } 37 }
38 return std::nullopt; 38 return std::nullopt;
39} 39}
40 40
41/// Get the preferred host visible memory type.
42u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
43 u32 filter = std::numeric_limits<u32>::max()) {
44 // Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
45 // Otherwise search for a host visible allocation.
46 static constexpr auto HOST_MEMORY =
47 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
48 static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
49
50 std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
51 if (!preferred_type) {
52 preferred_type = FindMemoryType(properties, HOST_MEMORY);
53 ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
54 }
55 return preferred_type.value_or(0);
56}
57
41} // Anonymous namespace 58} // Anonymous namespace
42 59
43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 60VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
@@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
51VKStreamBuffer::~VKStreamBuffer() = default; 68VKStreamBuffer::~VKStreamBuffer() = default;
52 69
53std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { 70std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
54 ASSERT(size <= STREAM_BUFFER_SIZE); 71 ASSERT(size <= stream_buffer_size);
55 mapped_size = size; 72 mapped_size = size;
56 73
57 if (alignment > 0) { 74 if (alignment > 0) {
@@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
61 WaitPendingOperations(offset); 78 WaitPendingOperations(offset);
62 79
63 bool invalidated = false; 80 bool invalidated = false;
64 if (offset + size > STREAM_BUFFER_SIZE) { 81 if (offset + size > stream_buffer_size) {
65 // The buffer would overflow, save the amount of used watches and reset the state. 82 // The buffer would overflow, save the amount of used watches and reset the state.
66 invalidation_mark = current_watch_cursor; 83 invalidation_mark = current_watch_cursor;
67 current_watch_cursor = 0; 84 current_watch_cursor = 0;
@@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) {
98} 115}
99 116
100void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { 117void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
118 const auto memory_properties = device.GetPhysical().GetMemoryProperties();
119 const u32 preferred_type = GetMemoryType(memory_properties);
120 const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
121
122 // Substract from the preferred heap size some bytes to avoid getting out of memory.
123 const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
124 const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024;
125
101 VkBufferCreateInfo buffer_ci; 126 VkBufferCreateInfo buffer_ci;
102 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 127 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
103 buffer_ci.pNext = nullptr; 128 buffer_ci.pNext = nullptr;
104 buffer_ci.flags = 0; 129 buffer_ci.flags = 0;
105 buffer_ci.size = STREAM_BUFFER_SIZE; 130 buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size);
106 buffer_ci.usage = usage; 131 buffer_ci.usage = usage;
107 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 132 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
108 buffer_ci.queueFamilyIndexCount = 0; 133 buffer_ci.queueFamilyIndexCount = 0;
109 buffer_ci.pQueueFamilyIndices = nullptr; 134 buffer_ci.pQueueFamilyIndices = nullptr;
110 135
111 const auto& dev = device.GetLogical(); 136 buffer = device.GetLogical().CreateBuffer(buffer_ci);
112 buffer = dev.CreateBuffer(buffer_ci); 137
113 138 const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer);
114 const auto& dld = device.GetDispatchLoader(); 139 const u32 required_flags = requirements.memoryTypeBits;
115 const auto requirements = dev.GetBufferMemoryRequirements(*buffer); 140 stream_buffer_size = static_cast<u64>(requirements.size);
116 // Prefer device local host visible allocations (this should hit AMD's pinned memory). 141
117 auto type =
118 FindMemoryType(device, requirements.memoryTypeBits,
119 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
120 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
121 if (!type) {
122 // Otherwise search for a host visible allocation.
123 type = FindMemoryType(device, requirements.memoryTypeBits,
124 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
125 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
126 ASSERT_MSG(type, "No host visible and coherent memory type found");
127 }
128 VkMemoryAllocateInfo memory_ai; 142 VkMemoryAllocateInfo memory_ai;
129 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 143 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
130 memory_ai.pNext = nullptr; 144 memory_ai.pNext = nullptr;
131 memory_ai.allocationSize = requirements.size; 145 memory_ai.allocationSize = requirements.size;
132 memory_ai.memoryTypeIndex = *type; 146 memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags);
133 147
134 memory = dev.AllocateMemory(memory_ai); 148 memory = device.GetLogical().AllocateMemory(memory_ai);
135 buffer.BindMemory(*memory, 0); 149 buffer.BindMemory(*memory, 0);
136} 150}
137 151
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 58ce8b973..dfddf7ad6 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -56,8 +56,9 @@ private:
56 const VKDevice& device; ///< Vulkan device manager. 56 const VKDevice& device; ///< Vulkan device manager.
57 VKScheduler& scheduler; ///< Command scheduler. 57 VKScheduler& scheduler; ///< Command scheduler.
58 58
59 vk::Buffer buffer; ///< Mapped buffer. 59 vk::Buffer buffer; ///< Mapped buffer.
60 vk::DeviceMemory memory; ///< Memory allocation. 60 vk::DeviceMemory memory; ///< Memory allocation.
61 u64 stream_buffer_size{}; ///< Stream buffer size.
61 62
62 u64 offset{}; ///< Buffer iterator. 63 u64 offset{}; ///< Buffer iterator.
63 u64 mapped_size{}; ///< Size reserved for the current copy. 64 u64 mapped_size{}; ///< Size reserved for the current copy.