summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/audio_core/cubeb_sink.cpp15
-rw-r--r--src/audio_core/cubeb_sink.h4
-rw-r--r--src/core/memory.cpp17
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp66
-rw-r--r--src/video_core/engines/maxwell_3d.h15
-rw-r--r--src/video_core/rasterizer_cache.h18
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp128
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h83
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp90
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h72
15 files changed, 469 insertions, 85 deletions
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index dc45dedd3..1da0b9f2a 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -12,6 +12,10 @@
12#include "common/ring_buffer.h" 12#include "common/ring_buffer.h"
13#include "core/settings.h" 13#include "core/settings.h"
14 14
15#ifdef _MSC_VER
16#include <objbase.h>
17#endif
18
15namespace AudioCore { 19namespace AudioCore {
16 20
17class CubebSinkStream final : public SinkStream { 21class CubebSinkStream final : public SinkStream {
@@ -108,6 +112,11 @@ private:
108}; 112};
109 113
110CubebSink::CubebSink(std::string_view target_device_name) { 114CubebSink::CubebSink(std::string_view target_device_name) {
115 // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows
116#ifdef _MSC_VER
117 com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
118#endif
119
111 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { 120 if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) {
112 LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); 121 LOG_CRITICAL(Audio_Sink, "cubeb_init failed");
113 return; 122 return;
@@ -142,6 +151,12 @@ CubebSink::~CubebSink() {
142 } 151 }
143 152
144 cubeb_destroy(ctx); 153 cubeb_destroy(ctx);
154
155#ifdef _MSC_VER
156 if (SUCCEEDED(com_init_result)) {
157 CoUninitialize();
158 }
159#endif
145} 160}
146 161
147SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, 162SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels,
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h
index efb9d1634..511df7bb1 100644
--- a/src/audio_core/cubeb_sink.h
+++ b/src/audio_core/cubeb_sink.h
@@ -25,6 +25,10 @@ private:
25 cubeb* ctx{}; 25 cubeb* ctx{};
26 cubeb_devid output_device{}; 26 cubeb_devid output_device{};
27 std::vector<SinkStreamPtr> sink_streams; 27 std::vector<SinkStreamPtr> sink_streams;
28
29#ifdef _MSC_VER
30 u32 com_init_result = 0;
31#endif
28}; 32};
29 33
30std::vector<std::string> ListCubebSinkDevices(); 34std::vector<std::string> ListCubebSinkDevices();
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index e9166dbd9..f809567b6 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
71 FlushMode::FlushAndInvalidate); 71 FlushMode::FlushAndInvalidate);
72 72
73 VAddr end = base + size; 73 VAddr end = base + size;
74 while (base != end) { 74 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
75 ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); 75 base + page_table.pointers.size());
76 76
77 page_table.attributes[base] = type; 77 std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type);
78 page_table.pointers[base] = memory;
79 78
80 base += 1; 79 if (memory == nullptr) {
81 if (memory != nullptr) 80 std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory);
81 } else {
82 while (base != end) {
83 page_table.pointers[base] = memory;
84
85 base += 1;
82 memory += PAGE_SIZE; 86 memory += PAGE_SIZE;
87 }
83 } 88 }
84} 89}
85 90
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6036d6ed3..60529323e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -111,7 +111,9 @@ if (ENABLE_VULKAN)
111 renderer_vulkan/vk_resource_manager.cpp 111 renderer_vulkan/vk_resource_manager.cpp
112 renderer_vulkan/vk_resource_manager.h 112 renderer_vulkan/vk_resource_manager.h
113 renderer_vulkan/vk_scheduler.cpp 113 renderer_vulkan/vk_scheduler.cpp
114 renderer_vulkan/vk_scheduler.h) 114 renderer_vulkan/vk_scheduler.h
115 renderer_vulkan/vk_stream_buffer.cpp
116 renderer_vulkan/vk_stream_buffer.h)
115 117
116 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) 118 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
117 target_compile_definitions(video_core PRIVATE HAS_VULKAN) 119 target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2d2136067..144e7fa82 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 107void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 auto debug_context = system.GetGPUDebugContext(); 108 auto debug_context = system.GetGPUDebugContext();
109 109
110 const u32 method = method_call.method;
111
110 // It is an error to write to a register other than the current macro's ARG register before it 112 // It is an error to write to a register other than the current macro's ARG register before it
111 // has finished execution. 113 // has finished execution.
112 if (executing_macro != 0) { 114 if (executing_macro != 0) {
113 ASSERT(method_call.method == executing_macro + 1); 115 ASSERT(method == executing_macro + 1);
114 } 116 }
115 117
116 // Methods after 0xE00 are special, they're actually triggers for some microcode that was 118 // Methods after 0xE00 are special, they're actually triggers for some microcode that was
117 // uploaded to the GPU during initialization. 119 // uploaded to the GPU during initialization.
118 if (method_call.method >= MacroRegistersStart) { 120 if (method >= MacroRegistersStart) {
119 // We're trying to execute a macro 121 // We're trying to execute a macro
120 if (executing_macro == 0) { 122 if (executing_macro == 0) {
121 // A macro call must begin by writing the macro method's register, not its argument. 123 // A macro call must begin by writing the macro method's register, not its argument.
122 ASSERT_MSG((method_call.method % 2) == 0, 124 ASSERT_MSG((method % 2) == 0,
123 "Can't start macro execution by writing to the ARGS register"); 125 "Can't start macro execution by writing to the ARGS register");
124 executing_macro = method_call.method; 126 executing_macro = method;
125 } 127 }
126 128
127 macro_params.push_back(method_call.argument); 129 macro_params.push_back(method_call.argument);
@@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
133 return; 135 return;
134 } 136 }
135 137
136 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 138 ASSERT_MSG(method < Regs::NUM_REGS,
137 "Invalid Maxwell3D register, increase the size of the Regs structure"); 139 "Invalid Maxwell3D register, increase the size of the Regs structure");
138 140
139 if (debug_context) { 141 if (debug_context) {
140 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); 142 debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
141 } 143 }
142 144
143 if (regs.reg_array[method_call.method] != method_call.argument) { 145 if (regs.reg_array[method] != method_call.argument) {
144 regs.reg_array[method_call.method] = method_call.argument; 146 regs.reg_array[method] = method_call.argument;
145 // Color buffers 147 // Color buffers
146 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 148 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
147 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 149 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
148 if (method_call.method >= first_rt_reg && 150 if (method >= first_rt_reg &&
149 method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 151 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
150 const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; 152 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
151 dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); 153 dirty_flags.color_buffer.set(rt_index);
152 } 154 }
153 155
154 // Zeta buffer 156 // Zeta buffer
155 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 157 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
156 if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || 158 if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
157 method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || 159 method == MAXWELL3D_REG_INDEX(zeta_width) ||
158 method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || 160 method == MAXWELL3D_REG_INDEX(zeta_height) ||
159 (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && 161 (method >= MAXWELL3D_REG_INDEX(zeta) &&
160 method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { 162 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
161 dirty_flags.zeta_buffer = true; 163 dirty_flags.zeta_buffer = true;
162 } 164 }
163 165
164 // Shader 166 // Shader
165 constexpr u32 shader_registers_count = 167 constexpr u32 shader_registers_count =
166 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); 168 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
167 if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && 169 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
168 method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { 170 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
169 dirty_flags.shaders = true; 171 dirty_flags.shaders = true;
170 } 172 }
171 173
172 // Vertex format 174 // Vertex format
173 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && 175 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
174 method_call.method < 176 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
175 MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true; 177 dirty_flags.vertex_attrib_format = true;
177 } 178 }
178 179
179 // Vertex buffer 180 // Vertex buffer
180 if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && 181 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { 182 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) {
182 dirty_flags.vertex_array |= 183 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); 184 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && 185 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) {
185 method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { 186 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 dirty_flags.vertex_array |= 187 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); 188 method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
188 } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && 189 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) {
190 dirty_flags.vertex_array |=
191 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays));
192 } 190 }
193 } 191 }
194 192
195 switch (method_call.method) { 193 switch (method) {
196 case MAXWELL3D_REG_INDEX(macros.data): { 194 case MAXWELL3D_REG_INDEX(macros.data): {
197 ProcessMacroUpload(method_call.argument); 195 ProcessMacroUpload(method_call.argument);
198 break; 196 break;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 584f51c48..7fbf1026e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -5,8 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bitset>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
11
10#include "common/assert.h" 12#include "common/assert.h"
11#include "common/bit_field.h" 13#include "common/bit_field.h"
12#include "common/common_funcs.h" 14#include "common/common_funcs.h"
@@ -1094,19 +1096,18 @@ public:
1094 MemoryManager& memory_manager; 1096 MemoryManager& memory_manager;
1095 1097
1096 struct DirtyFlags { 1098 struct DirtyFlags {
1097 u8 color_buffer = 0xFF; 1099 std::bitset<8> color_buffer{0xFF};
1098 bool zeta_buffer = true; 1100 std::bitset<32> vertex_array{0xFFFFFFFF};
1099
1100 bool shaders = true;
1101 1101
1102 bool vertex_attrib_format = true; 1102 bool vertex_attrib_format = true;
1103 u32 vertex_array = 0xFFFFFFFF; 1103 bool zeta_buffer = true;
1104 bool shaders = true;
1104 1105
1105 void OnMemoryWrite() { 1106 void OnMemoryWrite() {
1106 color_buffer = 0xFF;
1107 zeta_buffer = true; 1107 zeta_buffer = true;
1108 shaders = true; 1108 shaders = true;
1109 vertex_array = 0xFFFFFFFF; 1109 color_buffer.set();
1110 vertex_array.set();
1110 } 1111 }
1111 }; 1112 };
1112 1113
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index bcf0c15a4..a7bcf26fb 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -129,6 +129,15 @@ protected:
129 return ++modified_ticks; 129 return ++modified_ticks;
130 } 130 }
131 131
132 /// Flushes the specified object, updating appropriate cache state as needed
133 void FlushObject(const T& object) {
134 if (!object->IsDirty()) {
135 return;
136 }
137 object->Flush();
138 object->MarkAsModified(false, *this);
139 }
140
132private: 141private:
133 /// Returns a list of cached objects from the specified memory region, ordered by access time 142 /// Returns a list of cached objects from the specified memory region, ordered by access time
134 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 143 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
@@ -154,15 +163,6 @@ private:
154 return objects; 163 return objects;
155 } 164 }
156 165
157 /// Flushes the specified object, updating appropriate cache state as needed
158 void FlushObject(const T& object) {
159 if (!object->IsDirty()) {
160 return;
161 }
162 object->Flush();
163 object->MarkAsModified(false, *this);
164 }
165
166 using ObjectSet = std::set<T>; 166 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 167 using ObjectCache = std::unordered_map<VAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 09fa01d25..c8c1d6911 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -102,8 +102,8 @@ struct FramebufferCacheKey {
102 102
103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, 103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
104 ScreenInfo& info) 104 ScreenInfo& info)
105 : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, 105 : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window},
106 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 106 screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
107 // Create sampler objects 107 // Create sampler objects
108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
109 texture_samplers[i].Create(); 109 texture_samplers[i].Create();
@@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
200 } 200 }
201 201
202 // Rebinding the VAO invalidates the vertex buffer bindings. 202 // Rebinding the VAO invalidates the vertex buffer bindings.
203 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 203 gpu.dirty_flags.vertex_array.set();
204 204
205 state.draw.vertex_array = vao_entry.handle; 205 state.draw.vertex_array = vao_entry.handle;
206 return vao_entry.handle; 206 return vao_entry.handle;
@@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 210 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
211 const auto& regs = gpu.regs; 211 const auto& regs = gpu.regs;
212 212
213 if (!gpu.dirty_flags.vertex_array) 213 if (gpu.dirty_flags.vertex_array.none())
214 return; 214 return;
215 215
216 MICROPROFILE_SCOPE(OpenGL_VB); 216 MICROPROFILE_SCOPE(OpenGL_VB);
217 217
218 // Upload all guest vertex arrays sequentially to our buffer 218 // Upload all guest vertex arrays sequentially to our buffer
219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 219 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
220 if (~gpu.dirty_flags.vertex_array & (1u << index)) 220 if (!gpu.dirty_flags.vertex_array[index])
221 continue; 221 continue;
222 222
223 const auto& vertex_array = regs.vertex_array[index]; 223 const auto& vertex_array = regs.vertex_array[index];
@@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
244 } 244 }
245 } 245 }
246 246
247 gpu.dirty_flags.vertex_array = 0; 247 gpu.dirty_flags.vertex_array.reset();
248} 248}
249 249
250DrawParameters RasterizerOpenGL::SetupDraw() { 250DrawParameters RasterizerOpenGL::SetupDraw() {
@@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
489 std::optional<std::size_t> single_color_target) { 489 std::optional<std::size_t> single_color_target) {
490 MICROPROFILE_SCOPE(OpenGL_Framebuffer); 490 MICROPROFILE_SCOPE(OpenGL_Framebuffer);
491 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 491 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
492 const auto& regs = gpu.regs; 492 const auto& regs = gpu.regs;
493 493
494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 494 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
495 single_color_target}; 495 single_color_target};
496 if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && 496 if (fb_config_state == current_framebuffer_config_state &&
497 !gpu.dirty_flags.zeta_buffer) { 497 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 498 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
499 // single color targets). This is done because the guest registers may not change but the 499 // single color targets). This is done because the guest registers may not change but the
500 // host framebuffer may contain different attachments 500 // host framebuffer may contain different attachments
@@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() {
721 // Add space for at least 18 constant buffers 721 // Add space for at least 18 constant buffers
722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); 722 buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
723 723
724 bool invalidate = buffer_cache.Map(buffer_size); 724 const bool invalidate = buffer_cache.Map(buffer_size);
725 if (invalidate) { 725 if (invalidate) {
726 // As all cached buffers are invalidated, we need to recheck their state. 726 // As all cached buffers are invalidated, we need to recheck their state.
727 gpu.dirty_flags.vertex_array = 0xFFFFFFFF; 727 gpu.dirty_flags.vertex_array.set();
728 } 728 }
729 729
730 const GLuint vao = SetupVertexFormat(); 730 const GLuint vao = SetupVertexFormat();
@@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() {
738 shader_program_manager->ApplyTo(state); 738 shader_program_manager->ApplyTo(state);
739 state.Apply(); 739 state.Apply();
740 740
741 res_cache.SignalPreDrawCall();
742
741 // Execute draw call 743 // Execute draw call
742 params.DispatchDraw(); 744 params.DispatchDraw();
743 745
746 res_cache.SignalPostDrawCall();
747
744 // Disable scissor test 748 // Disable scissor test
745 state.viewports[0].scissor.enabled = false; 749 state.viewports[0].scissor.enabled = false;
746 750
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 49c79811d..5fdf1164d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <optional>
6#include <glad/glad.h> 7#include <glad/glad.h>
7 8
8#include "common/alignment.h" 9#include "common/alignment.h"
@@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
549 // alternatives. This signals a bug on those functions. 550 // alternatives. This signals a bug on those functions.
550 const auto width = static_cast<GLsizei>(params.MipWidth(0)); 551 const auto width = static_cast<GLsizei>(params.MipWidth(0));
551 const auto height = static_cast<GLsizei>(params.MipHeight(0)); 552 const auto height = static_cast<GLsizei>(params.MipHeight(0));
553 memory_size = params.MemorySize();
554 reinterpreted = false;
552 555
553 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 556 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
554 gl_internal_format = format_tuple.internal_format; 557 gl_internal_format = format_tuple.internal_format;
@@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre
962 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; 965 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
963 const auto& regs{gpu.regs}; 966 const auto& regs{gpu.regs};
964 967
965 if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { 968 if (!gpu.dirty_flags.color_buffer[index]) {
966 return last_color_buffers[index]; 969 return last_color_buffers[index];
967 } 970 }
968 gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); 971 gpu.dirty_flags.color_buffer.reset(index);
969 972
970 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 973 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
971 974
972 if (index >= regs.rt_control.count) { 975 if (index >= regs.rt_control.count) {
973 return last_color_buffers[index] = {}; 976 return current_color_buffers[index] = {};
974 } 977 }
975 978
976 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { 979 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
977 return last_color_buffers[index] = {}; 980 return current_color_buffers[index] = {};
978 } 981 }
979 982
980 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; 983 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
981 984
982 return last_color_buffers[index] = GetSurface(color_params, preserve_contents); 985 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
983} 986}
984 987
985void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { 988void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
986 surface->LoadGLBuffer(); 989 surface->LoadGLBuffer();
987 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 990 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
988 surface->MarkAsModified(false, *this); 991 surface->MarkAsModified(false, *this);
992 surface->MarkForReload(false);
989} 993}
990 994
991Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 995Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
@@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
997 Surface surface{TryGet(params.addr)}; 1001 Surface surface{TryGet(params.addr)};
998 if (surface) { 1002 if (surface) {
999 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 1003 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
1000 // Use the cached surface as-is 1004 // Use the cached surface as-is unless it's not synced with memory
1005 if (surface->MustReload())
1006 LoadSurface(surface);
1001 return surface; 1007 return surface;
1002 } else if (preserve_contents) { 1008 } else if (preserve_contents) {
1003 // If surface parameters changed and we care about keeping the previous data, recreate 1009 // If surface parameters changed and we care about keeping the previous data, recreate
1004 // the surface from the old one 1010 // the surface from the old one
1005 Surface new_surface{RecreateSurface(surface, params)}; 1011 Surface new_surface{RecreateSurface(surface, params)};
1006 Unregister(surface); 1012 UnregisterSurface(surface);
1007 Register(new_surface); 1013 Register(new_surface);
1014 if (new_surface->IsUploaded()) {
1015 RegisterReinterpretSurface(new_surface);
1016 }
1008 return new_surface; 1017 return new_surface;
1009 } else { 1018 } else {
1010 // Delete the old surface before creating a new one to prevent collisions. 1019 // Delete the old surface before creating a new one to prevent collisions.
1011 Unregister(surface); 1020 UnregisterSurface(surface);
1012 } 1021 }
1013 } 1022 }
1014 1023
@@ -1290,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
1290 return {}; 1299 return {};
1291} 1300}
1292 1301
1302static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1303 u32 height) {
1304 for (u32 i = 0; i < params.max_mip_level; i++) {
1305 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1306 return {i};
1307 }
1308 }
1309 return {};
1310}
1311
1312static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) {
1313 const std::size_t size = params.LayerMemorySize();
1314 VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap);
1315 for (u32 i = 0; i < params.depth; i++) {
1316 if (start == addr) {
1317 return {i};
1318 }
1319 start += size;
1320 }
1321 return {};
1322}
1323
1324static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1325 const Surface blitted_surface) {
1326 const auto& dst_params = blitted_surface->GetSurfaceParams();
1327 const auto& src_params = render_surface->GetSurfaceParams();
1328 const std::size_t src_memory_size = src_params.size_in_bytes;
1329 const std::optional<u32> level =
1330 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1331 if (level.has_value()) {
1332 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1333 src_params.height == dst_params.MipHeight(*level) &&
1334 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1335 const std::optional<u32> slot =
1336 TryFindBestLayer(render_surface->GetAddr(), dst_params, *level);
1337 if (slot.has_value()) {
1338 glCopyImageSubData(render_surface->Texture().handle,
1339 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1340 blitted_surface->Texture().handle,
1341 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1342 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1343 blitted_surface->MarkAsModified(true, cache);
1344 return true;
1345 }
1346 }
1347 }
1348 return false;
1349}
1350
1351static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1352 const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize();
1353 const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize();
1354 if (bound2 > bound1)
1355 return true;
1356 const auto& dst_params = blitted_surface->GetSurfaceParams();
1357 const auto& src_params = render_surface->GetSurfaceParams();
1358 return (dst_params.component_type != src_params.component_type);
1359}
1360
1361static bool IsReinterpretInvalidSecond(const Surface render_surface,
1362 const Surface blitted_surface) {
1363 const auto& dst_params = blitted_surface->GetSurfaceParams();
1364 const auto& src_params = render_surface->GetSurfaceParams();
1365 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1366}
1367
1368bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1369 Surface intersect) {
1370 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1371 UnregisterSurface(intersect);
1372 return false;
1373 }
1374 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1375 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1376 UnregisterSurface(intersect);
1377 return false;
1378 }
1379 FlushObject(intersect);
1380 FlushObject(triggering_surface);
1381 intersect->MarkForReload(true);
1382 }
1383 return true;
1384}
1385
1386void RasterizerCacheOpenGL::SignalPreDrawCall() {
1387 if (texception && GLAD_GL_ARB_texture_barrier) {
1388 glTextureBarrier();
1389 }
1390 texception = false;
1391}
1392
1393void RasterizerCacheOpenGL::SignalPostDrawCall() {
1394 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1395 if (current_color_buffers[i] != nullptr) {
1396 Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr());
1397 if (intersect != nullptr) {
1398 PartialReinterpretSurface(current_color_buffers[i], intersect);
1399 texception = true;
1400 }
1401 }
1402 }
1403}
1404
1293} // namespace OpenGL 1405} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 838554c35..797bbdc9c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -34,6 +34,7 @@ using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
34using SurfaceType = VideoCore::Surface::SurfaceType; 34using SurfaceType = VideoCore::Surface::SurfaceType;
35using PixelFormat = VideoCore::Surface::PixelFormat; 35using PixelFormat = VideoCore::Surface::PixelFormat;
36using ComponentType = VideoCore::Surface::ComponentType; 36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
37 38
38struct SurfaceParams { 39struct SurfaceParams {
39 enum class SurfaceClass { 40 enum class SurfaceClass {
@@ -140,10 +141,18 @@ struct SurfaceParams {
140 return offset; 141 return offset;
141 } 142 }
142 143
144 std::size_t GetMipmapSingleSize(u32 mip_level) const {
145 return InnerMipmapMemorySize(mip_level, false, is_layered);
146 }
147
143 u32 MipWidth(u32 mip_level) const { 148 u32 MipWidth(u32 mip_level) const {
144 return std::max(1U, width >> mip_level); 149 return std::max(1U, width >> mip_level);
145 } 150 }
146 151
152 u32 MipWidthGobAligned(u32 mip_level) const {
153 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
154 }
155
147 u32 MipHeight(u32 mip_level) const { 156 u32 MipHeight(u32 mip_level) const {
148 return std::max(1U, height >> mip_level); 157 return std::max(1U, height >> mip_level);
149 } 158 }
@@ -346,6 +355,10 @@ public:
346 return cached_size_in_bytes; 355 return cached_size_in_bytes;
347 } 356 }
348 357
358 std::size_t GetMemorySize() const {
359 return memory_size;
360 }
361
349 void Flush() override { 362 void Flush() override {
350 FlushGLBuffer(); 363 FlushGLBuffer();
351 } 364 }
@@ -395,6 +408,26 @@ public:
395 Tegra::Texture::SwizzleSource swizzle_z, 408 Tegra::Texture::SwizzleSource swizzle_z,
396 Tegra::Texture::SwizzleSource swizzle_w); 409 Tegra::Texture::SwizzleSource swizzle_w);
397 410
411 void MarkReinterpreted() {
412 reinterpreted = true;
413 }
414
415 bool IsReinterpreted() const {
416 return reinterpreted;
417 }
418
419 void MarkForReload(bool reload) {
420 must_reload = reload;
421 }
422
423 bool MustReload() const {
424 return must_reload;
425 }
426
427 bool IsUploaded() const {
428 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
429 }
430
398private: 431private:
399 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 432 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
400 433
@@ -408,6 +441,9 @@ private:
408 GLenum gl_internal_format{}; 441 GLenum gl_internal_format{};
409 std::size_t cached_size_in_bytes{}; 442 std::size_t cached_size_in_bytes{};
410 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; 443 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
444 std::size_t memory_size;
445 bool reinterpreted = false;
446 bool must_reload = false;
411}; 447};
412 448
413class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 449class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -433,6 +469,9 @@ public:
433 const Common::Rectangle<u32>& src_rect, 469 const Common::Rectangle<u32>& src_rect,
434 const Common::Rectangle<u32>& dst_rect); 470 const Common::Rectangle<u32>& dst_rect);
435 471
472 void SignalPreDrawCall();
473 void SignalPostDrawCall();
474
436private: 475private:
437 void LoadSurface(const Surface& surface); 476 void LoadSurface(const Surface& surface);
438 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); 477 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -449,6 +488,10 @@ private:
449 /// Tries to get a reserved surface for the specified parameters 488 /// Tries to get a reserved surface for the specified parameters
450 Surface TryGetReservedSurface(const SurfaceParams& params); 489 Surface TryGetReservedSurface(const SurfaceParams& params);
451 490
491 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
492 // returns true if the reinterpret was successful, false in case it was not.
493 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
494
452 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data 495 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
453 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); 496 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
454 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); 497 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
@@ -465,12 +508,50 @@ private:
465 OGLFramebuffer read_framebuffer; 508 OGLFramebuffer read_framebuffer;
466 OGLFramebuffer draw_framebuffer; 509 OGLFramebuffer draw_framebuffer;
467 510
511 bool texception = false;
512
468 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one 513 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
469 /// using the new format. 514 /// using the new format.
470 OGLBuffer copy_pbo; 515 OGLBuffer copy_pbo;
471 516
472 std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; 517 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
518 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
473 Surface last_depth_buffer; 519 Surface last_depth_buffer;
520
521 using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>;
522 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
523
524 static auto GetReinterpretInterval(const Surface& object) {
525 return SurfaceInterval::right_open(object->GetAddr() + 1,
526 object->GetAddr() + object->GetMemorySize() - 1);
527 }
528
529 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
530 SurfaceIntervalCache reinterpreted_surfaces;
531
532 void RegisterReinterpretSurface(Surface reinterpret_surface) {
533 auto interval = GetReinterpretInterval(reinterpret_surface);
534 reinterpreted_surfaces.insert({interval, reinterpret_surface});
535 reinterpret_surface->MarkReinterpreted();
536 }
537
538 Surface CollideOnReinterpretedSurface(VAddr addr) const {
539 const SurfaceInterval interval{addr};
540 for (auto& pair :
541 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
542 return pair.second;
543 }
544 return nullptr;
545 }
546
547 /// Unregisters an object from the cache
548 void UnregisterSurface(const Surface& object) {
549 if (object->IsReinterpreted()) {
550 auto interval = GetReinterpretInterval(object);
551 reinterpreted_surfaces.erase(interval);
552 }
553 Unregister(object);
554 }
474}; 555};
475 556
476} // namespace OpenGL 557} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 17ee93b91..0451babbf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
238 238
239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, 239VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory,
240 u8* data, u64 begin, u64 end) 240 u8* data, u64 begin, u64 end)
241 : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} 241 : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {}
242 242
243VKMemoryCommitImpl::~VKMemoryCommitImpl() { 243VKMemoryCommitImpl::~VKMemoryCommitImpl() {
244 allocation->Free(this); 244 allocation->Free(this);
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index 1678463c7..a1e117443 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) {
125 protected_resources.push_back(resource); 125 protected_resources.push_back(resource);
126} 126}
127 127
128void VKFence::Unprotect(const VKResource* resource) { 128void VKFence::Unprotect(VKResource* resource) {
129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); 129 const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
130 if (it != protected_resources.end()) { 130 ASSERT(it != protected_resources.end());
131 protected_resources.erase(it); 131
132 } 132 resource->OnFenceRemoval(this);
133 protected_resources.erase(it);
133} 134}
134 135
135VKFenceWatch::VKFenceWatch() = default; 136VKFenceWatch::VKFenceWatch() = default;
@@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() {
141} 142}
142 143
143void VKFenceWatch::Wait() { 144void VKFenceWatch::Wait() {
144 if (!fence) { 145 if (fence == nullptr) {
145 return; 146 return;
146 } 147 }
147 fence->Wait(); 148 fence->Wait();
148 fence->Unprotect(this); 149 fence->Unprotect(this);
149 fence = nullptr;
150} 150}
151 151
152void VKFenceWatch::Watch(VKFence& new_fence) { 152void VKFenceWatch::Watch(VKFence& new_fence) {
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index 5018dfa44..5bfe4cead 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -63,7 +63,7 @@ public:
63 void Protect(VKResource* resource); 63 void Protect(VKResource* resource);
64 64
65 /// Removes protection for a resource. 65 /// Removes protection for a resource.
66 void Unprotect(const VKResource* resource); 66 void Unprotect(VKResource* resource);
67 67
68 /// Retreives the fence. 68 /// Retreives the fence.
69 operator vk::Fence() const { 69 operator vk::Fence() const {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
new file mode 100644
index 000000000..58ffa42f2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -0,0 +1,90 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <optional>
8#include <vector>
9
10#include "common/assert.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_memory_manager.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h"
17
18namespace Vulkan {
19
20constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
21constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
22
23VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
24 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
25 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage)
26 : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{
27 pipeline_stage} {
28 CreateBuffers(memory_manager, usage);
29 ReserveWatches(WATCHES_INITIAL_RESERVE);
30}
31
32VKStreamBuffer::~VKStreamBuffer() = default;
33
34std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
35 ASSERT(size <= buffer_size);
36 mapped_size = size;
37
38 if (offset + size > buffer_size) {
39 // The buffer would overflow, save the amount of used buffers, signal an invalidation and
40 // reset the state.
41 invalidation_mark = used_watches;
42 used_watches = 0;
43 offset = 0;
44 }
45
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47}
48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51
52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt;
58 }
59
60 if (used_watches + 1 >= watches.size()) {
61 // Ensure that there are enough watches.
62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 }
64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence());
66
67 offset += size;
68
69 return exctx;
70}
71
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
73 const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0,
74 nullptr);
75
76 const auto dev = device.GetLogical();
77 const auto& dld = device.GetDispatchLoader();
78 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
79 commit = memory_manager.Commit(*buffer, true);
80 mapped_pointer = commit->GetData();
81}
82
83void VKStreamBuffer::ReserveWatches(std::size_t grow_size) {
84 const std::size_t previous_size = watches.size();
85 watches.resize(previous_size + grow_size);
86 std::generate(watches.begin() + previous_size, watches.end(),
87 []() { return std::make_unique<VKFenceWatch>(); });
88}
89
90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
new file mode 100644
index 000000000..69d036ccd
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -0,0 +1,72 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <tuple>
10#include <vector>
11
12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_memory_manager.h"
15
16namespace Vulkan {
17
18class VKDevice;
19class VKFence;
20class VKFenceWatch;
21class VKResourceManager;
22class VKScheduler;
23
24class VKStreamBuffer {
25public:
26 explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager,
27 VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage,
28 vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage);
29 ~VKStreamBuffer();
30
31 /**
32 * Reserves a region of memory from the stream buffer.
33 * @param size Size to reserve.
34 * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer
35 * offset and a boolean that's true when buffer has been invalidated.
36 */
37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size);
41
42 vk::Buffer GetBuffer() const {
43 return *buffer;
44 }
45
46private:
47 /// Creates Vulkan buffer handles committing the required the required memory.
48 void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage);
49
50 /// Increases the amount of watches available.
51 void ReserveWatches(std::size_t grow_size);
52
53 const VKDevice& device; ///< Vulkan device manager.
54 VKScheduler& scheduler; ///< Command scheduler.
55 const u64 buffer_size; ///< Total size of the stream buffer.
56 const vk::AccessFlags access; ///< Access usage of this stream buffer.
57 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
58
59 UniqueBuffer buffer; ///< Mapped buffer.
60 VKMemoryCommit commit; ///< Memory commit.
61 u8* mapped_pointer{}; ///< Pointer to the host visible commit
62
63 u64 offset{}; ///< Buffer iterator.
64 u64 mapped_size{}; ///< Size reserved for the current copy.
65
66 std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches
67 std::size_t used_watches{}; ///< Count of watches, reset on invalidation.
68 std::optional<std::size_t>
69 invalidation_mark{}; ///< Number of watches used in the current invalidation.
70};
71
72} // namespace Vulkan