diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/audio_core/cubeb_sink.cpp | 15 | ||||
| -rw-r--r-- | src/audio_core/cubeb_sink.h | 4 | ||||
| -rw-r--r-- | src/core/memory.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 66 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 15 | ||||
| -rw-r--r-- | src/video_core/rasterizer_cache.h | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 128 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 83 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_memory_manager.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_resource_manager.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_resource_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_stream_buffer.cpp | 90 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_stream_buffer.h | 72 |
15 files changed, 469 insertions, 85 deletions
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index dc45dedd3..1da0b9f2a 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp | |||
| @@ -12,6 +12,10 @@ | |||
| 12 | #include "common/ring_buffer.h" | 12 | #include "common/ring_buffer.h" |
| 13 | #include "core/settings.h" | 13 | #include "core/settings.h" |
| 14 | 14 | ||
| 15 | #ifdef _MSC_VER | ||
| 16 | #include <objbase.h> | ||
| 17 | #endif | ||
| 18 | |||
| 15 | namespace AudioCore { | 19 | namespace AudioCore { |
| 16 | 20 | ||
| 17 | class CubebSinkStream final : public SinkStream { | 21 | class CubebSinkStream final : public SinkStream { |
| @@ -108,6 +112,11 @@ private: | |||
| 108 | }; | 112 | }; |
| 109 | 113 | ||
| 110 | CubebSink::CubebSink(std::string_view target_device_name) { | 114 | CubebSink::CubebSink(std::string_view target_device_name) { |
| 115 | // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows | ||
| 116 | #ifdef _MSC_VER | ||
| 117 | com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); | ||
| 118 | #endif | ||
| 119 | |||
| 111 | if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { | 120 | if (cubeb_init(&ctx, "yuzu", nullptr) != CUBEB_OK) { |
| 112 | LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); | 121 | LOG_CRITICAL(Audio_Sink, "cubeb_init failed"); |
| 113 | return; | 122 | return; |
| @@ -142,6 +151,12 @@ CubebSink::~CubebSink() { | |||
| 142 | } | 151 | } |
| 143 | 152 | ||
| 144 | cubeb_destroy(ctx); | 153 | cubeb_destroy(ctx); |
| 154 | |||
| 155 | #ifdef _MSC_VER | ||
| 156 | if (SUCCEEDED(com_init_result)) { | ||
| 157 | CoUninitialize(); | ||
| 158 | } | ||
| 159 | #endif | ||
| 145 | } | 160 | } |
| 146 | 161 | ||
| 147 | SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, | 162 | SinkStream& CubebSink::AcquireSinkStream(u32 sample_rate, u32 num_channels, |
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h index efb9d1634..511df7bb1 100644 --- a/src/audio_core/cubeb_sink.h +++ b/src/audio_core/cubeb_sink.h | |||
| @@ -25,6 +25,10 @@ private: | |||
| 25 | cubeb* ctx{}; | 25 | cubeb* ctx{}; |
| 26 | cubeb_devid output_device{}; | 26 | cubeb_devid output_device{}; |
| 27 | std::vector<SinkStreamPtr> sink_streams; | 27 | std::vector<SinkStreamPtr> sink_streams; |
| 28 | |||
| 29 | #ifdef _MSC_VER | ||
| 30 | u32 com_init_result = 0; | ||
| 31 | #endif | ||
| 28 | }; | 32 | }; |
| 29 | 33 | ||
| 30 | std::vector<std::string> ListCubebSinkDevices(); | 34 | std::vector<std::string> ListCubebSinkDevices(); |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index e9166dbd9..f809567b6 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -71,15 +71,20 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa | |||
| 71 | FlushMode::FlushAndInvalidate); | 71 | FlushMode::FlushAndInvalidate); |
| 72 | 72 | ||
| 73 | VAddr end = base + size; | 73 | VAddr end = base + size; |
| 74 | while (base != end) { | 74 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| 75 | ASSERT_MSG(base < page_table.pointers.size(), "out of range mapping at {:016X}", base); | 75 | base + page_table.pointers.size()); |
| 76 | 76 | ||
| 77 | page_table.attributes[base] = type; | 77 | std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); |
| 78 | page_table.pointers[base] = memory; | ||
| 79 | 78 | ||
| 80 | base += 1; | 79 | if (memory == nullptr) { |
| 81 | if (memory != nullptr) | 80 | std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); |
| 81 | } else { | ||
| 82 | while (base != end) { | ||
| 83 | page_table.pointers[base] = memory; | ||
| 84 | |||
| 85 | base += 1; | ||
| 82 | memory += PAGE_SIZE; | 86 | memory += PAGE_SIZE; |
| 87 | } | ||
| 83 | } | 88 | } |
| 84 | } | 89 | } |
| 85 | 90 | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6036d6ed3..60529323e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -111,7 +111,9 @@ if (ENABLE_VULKAN) | |||
| 111 | renderer_vulkan/vk_resource_manager.cpp | 111 | renderer_vulkan/vk_resource_manager.cpp |
| 112 | renderer_vulkan/vk_resource_manager.h | 112 | renderer_vulkan/vk_resource_manager.h |
| 113 | renderer_vulkan/vk_scheduler.cpp | 113 | renderer_vulkan/vk_scheduler.cpp |
| 114 | renderer_vulkan/vk_scheduler.h) | 114 | renderer_vulkan/vk_scheduler.h |
| 115 | renderer_vulkan/vk_stream_buffer.cpp | ||
| 116 | renderer_vulkan/vk_stream_buffer.h) | ||
| 115 | 117 | ||
| 116 | target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) | 118 | target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) |
| 117 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) | 119 | target_compile_definitions(video_core PRIVATE HAS_VULKAN) |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2d2136067..144e7fa82 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -107,21 +107,23 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { | |||
| 107 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | 107 | void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { |
| 108 | auto debug_context = system.GetGPUDebugContext(); | 108 | auto debug_context = system.GetGPUDebugContext(); |
| 109 | 109 | ||
| 110 | const u32 method = method_call.method; | ||
| 111 | |||
| 110 | // It is an error to write to a register other than the current macro's ARG register before it | 112 | // It is an error to write to a register other than the current macro's ARG register before it |
| 111 | // has finished execution. | 113 | // has finished execution. |
| 112 | if (executing_macro != 0) { | 114 | if (executing_macro != 0) { |
| 113 | ASSERT(method_call.method == executing_macro + 1); | 115 | ASSERT(method == executing_macro + 1); |
| 114 | } | 116 | } |
| 115 | 117 | ||
| 116 | // Methods after 0xE00 are special, they're actually triggers for some microcode that was | 118 | // Methods after 0xE00 are special, they're actually triggers for some microcode that was |
| 117 | // uploaded to the GPU during initialization. | 119 | // uploaded to the GPU during initialization. |
| 118 | if (method_call.method >= MacroRegistersStart) { | 120 | if (method >= MacroRegistersStart) { |
| 119 | // We're trying to execute a macro | 121 | // We're trying to execute a macro |
| 120 | if (executing_macro == 0) { | 122 | if (executing_macro == 0) { |
| 121 | // A macro call must begin by writing the macro method's register, not its argument. | 123 | // A macro call must begin by writing the macro method's register, not its argument. |
| 122 | ASSERT_MSG((method_call.method % 2) == 0, | 124 | ASSERT_MSG((method % 2) == 0, |
| 123 | "Can't start macro execution by writing to the ARGS register"); | 125 | "Can't start macro execution by writing to the ARGS register"); |
| 124 | executing_macro = method_call.method; | 126 | executing_macro = method; |
| 125 | } | 127 | } |
| 126 | 128 | ||
| 127 | macro_params.push_back(method_call.argument); | 129 | macro_params.push_back(method_call.argument); |
| @@ -133,66 +135,62 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 133 | return; | 135 | return; |
| 134 | } | 136 | } |
| 135 | 137 | ||
| 136 | ASSERT_MSG(method_call.method < Regs::NUM_REGS, | 138 | ASSERT_MSG(method < Regs::NUM_REGS, |
| 137 | "Invalid Maxwell3D register, increase the size of the Regs structure"); | 139 | "Invalid Maxwell3D register, increase the size of the Regs structure"); |
| 138 | 140 | ||
| 139 | if (debug_context) { | 141 | if (debug_context) { |
| 140 | debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); | 142 | debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); |
| 141 | } | 143 | } |
| 142 | 144 | ||
| 143 | if (regs.reg_array[method_call.method] != method_call.argument) { | 145 | if (regs.reg_array[method] != method_call.argument) { |
| 144 | regs.reg_array[method_call.method] = method_call.argument; | 146 | regs.reg_array[method] = method_call.argument; |
| 145 | // Color buffers | 147 | // Color buffers |
| 146 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); | 148 | constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); |
| 147 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 149 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); |
| 148 | if (method_call.method >= first_rt_reg && | 150 | if (method >= first_rt_reg && |
| 149 | method_call.method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { | 151 | method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { |
| 150 | const std::size_t rt_index = (method_call.method - first_rt_reg) / registers_per_rt; | 152 | const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; |
| 151 | dirty_flags.color_buffer |= 1u << static_cast<u32>(rt_index); | 153 | dirty_flags.color_buffer.set(rt_index); |
| 152 | } | 154 | } |
| 153 | 155 | ||
| 154 | // Zeta buffer | 156 | // Zeta buffer |
| 155 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); | 157 | constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); |
| 156 | if (method_call.method == MAXWELL3D_REG_INDEX(zeta_enable) || | 158 | if (method == MAXWELL3D_REG_INDEX(zeta_enable) || |
| 157 | method_call.method == MAXWELL3D_REG_INDEX(zeta_width) || | 159 | method == MAXWELL3D_REG_INDEX(zeta_width) || |
| 158 | method_call.method == MAXWELL3D_REG_INDEX(zeta_height) || | 160 | method == MAXWELL3D_REG_INDEX(zeta_height) || |
| 159 | (method_call.method >= MAXWELL3D_REG_INDEX(zeta) && | 161 | (method >= MAXWELL3D_REG_INDEX(zeta) && |
| 160 | method_call.method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { | 162 | method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) { |
| 161 | dirty_flags.zeta_buffer = true; | 163 | dirty_flags.zeta_buffer = true; |
| 162 | } | 164 | } |
| 163 | 165 | ||
| 164 | // Shader | 166 | // Shader |
| 165 | constexpr u32 shader_registers_count = | 167 | constexpr u32 shader_registers_count = |
| 166 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); | 168 | sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); |
| 167 | if (method_call.method >= MAXWELL3D_REG_INDEX(shader_config[0]) && | 169 | if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) && |
| 168 | method_call.method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { | 170 | method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) { |
| 169 | dirty_flags.shaders = true; | 171 | dirty_flags.shaders = true; |
| 170 | } | 172 | } |
| 171 | 173 | ||
| 172 | // Vertex format | 174 | // Vertex format |
| 173 | if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && | 175 | if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && |
| 174 | method_call.method < | 176 | method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { |
| 175 | MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { | ||
| 176 | dirty_flags.vertex_attrib_format = true; | 177 | dirty_flags.vertex_attrib_format = true; |
| 177 | } | 178 | } |
| 178 | 179 | ||
| 179 | // Vertex buffer | 180 | // Vertex buffer |
| 180 | if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array) && | 181 | if (method >= MAXWELL3D_REG_INDEX(vertex_array) && |
| 181 | method_call.method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { | 182 | method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * 32) { |
| 182 | dirty_flags.vertex_array |= | 183 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); |
| 183 | 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2); | 184 | } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && |
| 184 | } else if (method_call.method >= MAXWELL3D_REG_INDEX(vertex_array_limit) && | 185 | method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { |
| 185 | method_call.method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * 32) { | 186 | dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); |
| 186 | dirty_flags.vertex_array |= | 187 | } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) && |
| 187 | 1u << ((method_call.method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1); | 188 | method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { |
| 188 | } else if (method_call.method >= MAXWELL3D_REG_INDEX(instanced_arrays) && | 189 | dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays)); |
| 189 | method_call.method < MAXWELL3D_REG_INDEX(instanced_arrays) + 32) { | ||
| 190 | dirty_flags.vertex_array |= | ||
| 191 | 1u << (method_call.method - MAXWELL3D_REG_INDEX(instanced_arrays)); | ||
| 192 | } | 190 | } |
| 193 | } | 191 | } |
| 194 | 192 | ||
| 195 | switch (method_call.method) { | 193 | switch (method) { |
| 196 | case MAXWELL3D_REG_INDEX(macros.data): { | 194 | case MAXWELL3D_REG_INDEX(macros.data): { |
| 197 | ProcessMacroUpload(method_call.argument); | 195 | ProcessMacroUpload(method_call.argument); |
| 198 | break; | 196 | break; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 584f51c48..7fbf1026e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -5,8 +5,10 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <bitset> | ||
| 8 | #include <unordered_map> | 9 | #include <unordered_map> |
| 9 | #include <vector> | 10 | #include <vector> |
| 11 | |||
| 10 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 11 | #include "common/bit_field.h" | 13 | #include "common/bit_field.h" |
| 12 | #include "common/common_funcs.h" | 14 | #include "common/common_funcs.h" |
| @@ -1094,19 +1096,18 @@ public: | |||
| 1094 | MemoryManager& memory_manager; | 1096 | MemoryManager& memory_manager; |
| 1095 | 1097 | ||
| 1096 | struct DirtyFlags { | 1098 | struct DirtyFlags { |
| 1097 | u8 color_buffer = 0xFF; | 1099 | std::bitset<8> color_buffer{0xFF}; |
| 1098 | bool zeta_buffer = true; | 1100 | std::bitset<32> vertex_array{0xFFFFFFFF}; |
| 1099 | |||
| 1100 | bool shaders = true; | ||
| 1101 | 1101 | ||
| 1102 | bool vertex_attrib_format = true; | 1102 | bool vertex_attrib_format = true; |
| 1103 | u32 vertex_array = 0xFFFFFFFF; | 1103 | bool zeta_buffer = true; |
| 1104 | bool shaders = true; | ||
| 1104 | 1105 | ||
| 1105 | void OnMemoryWrite() { | 1106 | void OnMemoryWrite() { |
| 1106 | color_buffer = 0xFF; | ||
| 1107 | zeta_buffer = true; | 1107 | zeta_buffer = true; |
| 1108 | shaders = true; | 1108 | shaders = true; |
| 1109 | vertex_array = 0xFFFFFFFF; | 1109 | color_buffer.set(); |
| 1110 | vertex_array.set(); | ||
| 1110 | } | 1111 | } |
| 1111 | }; | 1112 | }; |
| 1112 | 1113 | ||
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index bcf0c15a4..a7bcf26fb 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -129,6 +129,15 @@ protected: | |||
| 129 | return ++modified_ticks; | 129 | return ++modified_ticks; |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | /// Flushes the specified object, updating appropriate cache state as needed | ||
| 133 | void FlushObject(const T& object) { | ||
| 134 | if (!object->IsDirty()) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | object->Flush(); | ||
| 138 | object->MarkAsModified(false, *this); | ||
| 139 | } | ||
| 140 | |||
| 132 | private: | 141 | private: |
| 133 | /// Returns a list of cached objects from the specified memory region, ordered by access time | 142 | /// Returns a list of cached objects from the specified memory region, ordered by access time |
| 134 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { | 143 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { |
| @@ -154,15 +163,6 @@ private: | |||
| 154 | return objects; | 163 | return objects; |
| 155 | } | 164 | } |
| 156 | 165 | ||
| 157 | /// Flushes the specified object, updating appropriate cache state as needed | ||
| 158 | void FlushObject(const T& object) { | ||
| 159 | if (!object->IsDirty()) { | ||
| 160 | return; | ||
| 161 | } | ||
| 162 | object->Flush(); | ||
| 163 | object->MarkAsModified(false, *this); | ||
| 164 | } | ||
| 165 | |||
| 166 | using ObjectSet = std::set<T>; | 166 | using ObjectSet = std::set<T>; |
| 167 | using ObjectCache = std::unordered_map<VAddr, T>; | 167 | using ObjectCache = std::unordered_map<VAddr, T>; |
| 168 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; | 168 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 09fa01d25..c8c1d6911 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -102,8 +102,8 @@ struct FramebufferCacheKey { | |||
| 102 | 102 | ||
| 103 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, | 103 | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, |
| 104 | ScreenInfo& info) | 104 | ScreenInfo& info) |
| 105 | : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info}, | 105 | : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, emu_window{window}, |
| 106 | buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { | 106 | screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { |
| 107 | // Create sampler objects | 107 | // Create sampler objects |
| 108 | for (std::size_t i = 0; i < texture_samplers.size(); ++i) { | 108 | for (std::size_t i = 0; i < texture_samplers.size(); ++i) { |
| 109 | texture_samplers[i].Create(); | 109 | texture_samplers[i].Create(); |
| @@ -200,7 +200,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | // Rebinding the VAO invalidates the vertex buffer bindings. | 202 | // Rebinding the VAO invalidates the vertex buffer bindings. |
| 203 | gpu.dirty_flags.vertex_array = 0xFFFFFFFF; | 203 | gpu.dirty_flags.vertex_array.set(); |
| 204 | 204 | ||
| 205 | state.draw.vertex_array = vao_entry.handle; | 205 | state.draw.vertex_array = vao_entry.handle; |
| 206 | return vao_entry.handle; | 206 | return vao_entry.handle; |
| @@ -210,14 +210,14 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 210 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 210 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 211 | const auto& regs = gpu.regs; | 211 | const auto& regs = gpu.regs; |
| 212 | 212 | ||
| 213 | if (!gpu.dirty_flags.vertex_array) | 213 | if (gpu.dirty_flags.vertex_array.none()) |
| 214 | return; | 214 | return; |
| 215 | 215 | ||
| 216 | MICROPROFILE_SCOPE(OpenGL_VB); | 216 | MICROPROFILE_SCOPE(OpenGL_VB); |
| 217 | 217 | ||
| 218 | // Upload all guest vertex arrays sequentially to our buffer | 218 | // Upload all guest vertex arrays sequentially to our buffer |
| 219 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | 219 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { |
| 220 | if (~gpu.dirty_flags.vertex_array & (1u << index)) | 220 | if (!gpu.dirty_flags.vertex_array[index]) |
| 221 | continue; | 221 | continue; |
| 222 | 222 | ||
| 223 | const auto& vertex_array = regs.vertex_array[index]; | 223 | const auto& vertex_array = regs.vertex_array[index]; |
| @@ -244,7 +244,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 244 | } | 244 | } |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | gpu.dirty_flags.vertex_array = 0; | 247 | gpu.dirty_flags.vertex_array.reset(); |
| 248 | } | 248 | } |
| 249 | 249 | ||
| 250 | DrawParameters RasterizerOpenGL::SetupDraw() { | 250 | DrawParameters RasterizerOpenGL::SetupDraw() { |
| @@ -488,13 +488,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 488 | OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, | 488 | OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, |
| 489 | std::optional<std::size_t> single_color_target) { | 489 | std::optional<std::size_t> single_color_target) { |
| 490 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); | 490 | MICROPROFILE_SCOPE(OpenGL_Framebuffer); |
| 491 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 491 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 492 | const auto& regs = gpu.regs; | 492 | const auto& regs = gpu.regs; |
| 493 | 493 | ||
| 494 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, | 494 | const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, |
| 495 | single_color_target}; | 495 | single_color_target}; |
| 496 | if (fb_config_state == current_framebuffer_config_state && gpu.dirty_flags.color_buffer == 0 && | 496 | if (fb_config_state == current_framebuffer_config_state && |
| 497 | !gpu.dirty_flags.zeta_buffer) { | 497 | gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) { |
| 498 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or | 498 | // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or |
| 499 | // single color targets). This is done because the guest registers may not change but the | 499 | // single color targets). This is done because the guest registers may not change but the |
| 500 | // host framebuffer may contain different attachments | 500 | // host framebuffer may contain different attachments |
| @@ -721,10 +721,10 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 721 | // Add space for at least 18 constant buffers | 721 | // Add space for at least 18 constant buffers |
| 722 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); | 722 | buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); |
| 723 | 723 | ||
| 724 | bool invalidate = buffer_cache.Map(buffer_size); | 724 | const bool invalidate = buffer_cache.Map(buffer_size); |
| 725 | if (invalidate) { | 725 | if (invalidate) { |
| 726 | // As all cached buffers are invalidated, we need to recheck their state. | 726 | // As all cached buffers are invalidated, we need to recheck their state. |
| 727 | gpu.dirty_flags.vertex_array = 0xFFFFFFFF; | 727 | gpu.dirty_flags.vertex_array.set(); |
| 728 | } | 728 | } |
| 729 | 729 | ||
| 730 | const GLuint vao = SetupVertexFormat(); | 730 | const GLuint vao = SetupVertexFormat(); |
| @@ -738,9 +738,13 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 738 | shader_program_manager->ApplyTo(state); | 738 | shader_program_manager->ApplyTo(state); |
| 739 | state.Apply(); | 739 | state.Apply(); |
| 740 | 740 | ||
| 741 | res_cache.SignalPreDrawCall(); | ||
| 742 | |||
| 741 | // Execute draw call | 743 | // Execute draw call |
| 742 | params.DispatchDraw(); | 744 | params.DispatchDraw(); |
| 743 | 745 | ||
| 746 | res_cache.SignalPostDrawCall(); | ||
| 747 | |||
| 744 | // Disable scissor test | 748 | // Disable scissor test |
| 745 | state.viewports[0].scissor.enabled = false; | 749 | state.viewports[0].scissor.enabled = false; |
| 746 | 750 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 49c79811d..5fdf1164d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <optional> | ||
| 6 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 7 | 8 | ||
| 8 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| @@ -549,6 +550,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 549 | // alternatives. This signals a bug on those functions. | 550 | // alternatives. This signals a bug on those functions. |
| 550 | const auto width = static_cast<GLsizei>(params.MipWidth(0)); | 551 | const auto width = static_cast<GLsizei>(params.MipWidth(0)); |
| 551 | const auto height = static_cast<GLsizei>(params.MipHeight(0)); | 552 | const auto height = static_cast<GLsizei>(params.MipHeight(0)); |
| 553 | memory_size = params.MemorySize(); | ||
| 554 | reinterpreted = false; | ||
| 552 | 555 | ||
| 553 | const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); | 556 | const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); |
| 554 | gl_internal_format = format_tuple.internal_format; | 557 | gl_internal_format = format_tuple.internal_format; |
| @@ -962,30 +965,31 @@ Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool pre | |||
| 962 | auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; | 965 | auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; |
| 963 | const auto& regs{gpu.regs}; | 966 | const auto& regs{gpu.regs}; |
| 964 | 967 | ||
| 965 | if ((gpu.dirty_flags.color_buffer & (1u << static_cast<u32>(index))) == 0) { | 968 | if (!gpu.dirty_flags.color_buffer[index]) { |
| 966 | return last_color_buffers[index]; | 969 | return last_color_buffers[index]; |
| 967 | } | 970 | } |
| 968 | gpu.dirty_flags.color_buffer &= ~(1u << static_cast<u32>(index)); | 971 | gpu.dirty_flags.color_buffer.reset(index); |
| 969 | 972 | ||
| 970 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 973 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 971 | 974 | ||
| 972 | if (index >= regs.rt_control.count) { | 975 | if (index >= regs.rt_control.count) { |
| 973 | return last_color_buffers[index] = {}; | 976 | return current_color_buffers[index] = {}; |
| 974 | } | 977 | } |
| 975 | 978 | ||
| 976 | if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | 979 | if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { |
| 977 | return last_color_buffers[index] = {}; | 980 | return current_color_buffers[index] = {}; |
| 978 | } | 981 | } |
| 979 | 982 | ||
| 980 | const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; | 983 | const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; |
| 981 | 984 | ||
| 982 | return last_color_buffers[index] = GetSurface(color_params, preserve_contents); | 985 | return current_color_buffers[index] = GetSurface(color_params, preserve_contents); |
| 983 | } | 986 | } |
| 984 | 987 | ||
| 985 | void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | 988 | void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { |
| 986 | surface->LoadGLBuffer(); | 989 | surface->LoadGLBuffer(); |
| 987 | surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | 990 | surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); |
| 988 | surface->MarkAsModified(false, *this); | 991 | surface->MarkAsModified(false, *this); |
| 992 | surface->MarkForReload(false); | ||
| 989 | } | 993 | } |
| 990 | 994 | ||
| 991 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { | 995 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { |
| @@ -997,18 +1001,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 997 | Surface surface{TryGet(params.addr)}; | 1001 | Surface surface{TryGet(params.addr)}; |
| 998 | if (surface) { | 1002 | if (surface) { |
| 999 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | 1003 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { |
| 1000 | // Use the cached surface as-is | 1004 | // Use the cached surface as-is unless it's not synced with memory |
| 1005 | if (surface->MustReload()) | ||
| 1006 | LoadSurface(surface); | ||
| 1001 | return surface; | 1007 | return surface; |
| 1002 | } else if (preserve_contents) { | 1008 | } else if (preserve_contents) { |
| 1003 | // If surface parameters changed and we care about keeping the previous data, recreate | 1009 | // If surface parameters changed and we care about keeping the previous data, recreate |
| 1004 | // the surface from the old one | 1010 | // the surface from the old one |
| 1005 | Surface new_surface{RecreateSurface(surface, params)}; | 1011 | Surface new_surface{RecreateSurface(surface, params)}; |
| 1006 | Unregister(surface); | 1012 | UnregisterSurface(surface); |
| 1007 | Register(new_surface); | 1013 | Register(new_surface); |
| 1014 | if (new_surface->IsUploaded()) { | ||
| 1015 | RegisterReinterpretSurface(new_surface); | ||
| 1016 | } | ||
| 1008 | return new_surface; | 1017 | return new_surface; |
| 1009 | } else { | 1018 | } else { |
| 1010 | // Delete the old surface before creating a new one to prevent collisions. | 1019 | // Delete the old surface before creating a new one to prevent collisions. |
| 1011 | Unregister(surface); | 1020 | UnregisterSurface(surface); |
| 1012 | } | 1021 | } |
| 1013 | } | 1022 | } |
| 1014 | 1023 | ||
| @@ -1290,4 +1299,107 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params | |||
| 1290 | return {}; | 1299 | return {}; |
| 1291 | } | 1300 | } |
| 1292 | 1301 | ||
| 1302 | static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, | ||
| 1303 | u32 height) { | ||
| 1304 | for (u32 i = 0; i < params.max_mip_level; i++) { | ||
| 1305 | if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { | ||
| 1306 | return {i}; | ||
| 1307 | } | ||
| 1308 | } | ||
| 1309 | return {}; | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { | ||
| 1313 | const std::size_t size = params.LayerMemorySize(); | ||
| 1314 | VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); | ||
| 1315 | for (u32 i = 0; i < params.depth; i++) { | ||
| 1316 | if (start == addr) { | ||
| 1317 | return {i}; | ||
| 1318 | } | ||
| 1319 | start += size; | ||
| 1320 | } | ||
| 1321 | return {}; | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, | ||
| 1325 | const Surface blitted_surface) { | ||
| 1326 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1327 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1328 | const std::size_t src_memory_size = src_params.size_in_bytes; | ||
| 1329 | const std::optional<u32> level = | ||
| 1330 | TryFindBestMipMap(src_memory_size, dst_params, src_params.height); | ||
| 1331 | if (level.has_value()) { | ||
| 1332 | if (src_params.width == dst_params.MipWidthGobAligned(*level) && | ||
| 1333 | src_params.height == dst_params.MipHeight(*level) && | ||
| 1334 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { | ||
| 1335 | const std::optional<u32> slot = | ||
| 1336 | TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); | ||
| 1337 | if (slot.has_value()) { | ||
| 1338 | glCopyImageSubData(render_surface->Texture().handle, | ||
| 1339 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | ||
| 1340 | blitted_surface->Texture().handle, | ||
| 1341 | SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, | ||
| 1342 | dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); | ||
| 1343 | blitted_surface->MarkAsModified(true, cache); | ||
| 1344 | return true; | ||
| 1345 | } | ||
| 1346 | } | ||
| 1347 | } | ||
| 1348 | return false; | ||
| 1349 | } | ||
| 1350 | |||
| 1351 | static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { | ||
| 1352 | const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); | ||
| 1353 | const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); | ||
| 1354 | if (bound2 > bound1) | ||
| 1355 | return true; | ||
| 1356 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1357 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1358 | return (dst_params.component_type != src_params.component_type); | ||
| 1359 | } | ||
| 1360 | |||
| 1361 | static bool IsReinterpretInvalidSecond(const Surface render_surface, | ||
| 1362 | const Surface blitted_surface) { | ||
| 1363 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1364 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1365 | return (dst_params.height > src_params.height && dst_params.width > src_params.width); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, | ||
| 1369 | Surface intersect) { | ||
| 1370 | if (IsReinterpretInvalid(triggering_surface, intersect)) { | ||
| 1371 | UnregisterSurface(intersect); | ||
| 1372 | return false; | ||
| 1373 | } | ||
| 1374 | if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { | ||
| 1375 | if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { | ||
| 1376 | UnregisterSurface(intersect); | ||
| 1377 | return false; | ||
| 1378 | } | ||
| 1379 | FlushObject(intersect); | ||
| 1380 | FlushObject(triggering_surface); | ||
| 1381 | intersect->MarkForReload(true); | ||
| 1382 | } | ||
| 1383 | return true; | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | void RasterizerCacheOpenGL::SignalPreDrawCall() { | ||
| 1387 | if (texception && GLAD_GL_ARB_texture_barrier) { | ||
| 1388 | glTextureBarrier(); | ||
| 1389 | } | ||
| 1390 | texception = false; | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | void RasterizerCacheOpenGL::SignalPostDrawCall() { | ||
| 1394 | for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { | ||
| 1395 | if (current_color_buffers[i] != nullptr) { | ||
| 1396 | Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); | ||
| 1397 | if (intersect != nullptr) { | ||
| 1398 | PartialReinterpretSurface(current_color_buffers[i], intersect); | ||
| 1399 | texception = true; | ||
| 1400 | } | ||
| 1401 | } | ||
| 1402 | } | ||
| 1403 | } | ||
| 1404 | |||
| 1293 | } // namespace OpenGL | 1405 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 838554c35..797bbdc9c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -34,6 +34,7 @@ using SurfaceTarget = VideoCore::Surface::SurfaceTarget; | |||
| 34 | using SurfaceType = VideoCore::Surface::SurfaceType; | 34 | using SurfaceType = VideoCore::Surface::SurfaceType; |
| 35 | using PixelFormat = VideoCore::Surface::PixelFormat; | 35 | using PixelFormat = VideoCore::Surface::PixelFormat; |
| 36 | using ComponentType = VideoCore::Surface::ComponentType; | 36 | using ComponentType = VideoCore::Surface::ComponentType; |
| 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 37 | 38 | ||
| 38 | struct SurfaceParams { | 39 | struct SurfaceParams { |
| 39 | enum class SurfaceClass { | 40 | enum class SurfaceClass { |
| @@ -140,10 +141,18 @@ struct SurfaceParams { | |||
| 140 | return offset; | 141 | return offset; |
| 141 | } | 142 | } |
| 142 | 143 | ||
| 144 | std::size_t GetMipmapSingleSize(u32 mip_level) const { | ||
| 145 | return InnerMipmapMemorySize(mip_level, false, is_layered); | ||
| 146 | } | ||
| 147 | |||
| 143 | u32 MipWidth(u32 mip_level) const { | 148 | u32 MipWidth(u32 mip_level) const { |
| 144 | return std::max(1U, width >> mip_level); | 149 | return std::max(1U, width >> mip_level); |
| 145 | } | 150 | } |
| 146 | 151 | ||
| 152 | u32 MipWidthGobAligned(u32 mip_level) const { | ||
| 153 | return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); | ||
| 154 | } | ||
| 155 | |||
| 147 | u32 MipHeight(u32 mip_level) const { | 156 | u32 MipHeight(u32 mip_level) const { |
| 148 | return std::max(1U, height >> mip_level); | 157 | return std::max(1U, height >> mip_level); |
| 149 | } | 158 | } |
| @@ -346,6 +355,10 @@ public: | |||
| 346 | return cached_size_in_bytes; | 355 | return cached_size_in_bytes; |
| 347 | } | 356 | } |
| 348 | 357 | ||
| 358 | std::size_t GetMemorySize() const { | ||
| 359 | return memory_size; | ||
| 360 | } | ||
| 361 | |||
| 349 | void Flush() override { | 362 | void Flush() override { |
| 350 | FlushGLBuffer(); | 363 | FlushGLBuffer(); |
| 351 | } | 364 | } |
| @@ -395,6 +408,26 @@ public: | |||
| 395 | Tegra::Texture::SwizzleSource swizzle_z, | 408 | Tegra::Texture::SwizzleSource swizzle_z, |
| 396 | Tegra::Texture::SwizzleSource swizzle_w); | 409 | Tegra::Texture::SwizzleSource swizzle_w); |
| 397 | 410 | ||
| 411 | void MarkReinterpreted() { | ||
| 412 | reinterpreted = true; | ||
| 413 | } | ||
| 414 | |||
| 415 | bool IsReinterpreted() const { | ||
| 416 | return reinterpreted; | ||
| 417 | } | ||
| 418 | |||
| 419 | void MarkForReload(bool reload) { | ||
| 420 | must_reload = reload; | ||
| 421 | } | ||
| 422 | |||
| 423 | bool MustReload() const { | ||
| 424 | return must_reload; | ||
| 425 | } | ||
| 426 | |||
| 427 | bool IsUploaded() const { | ||
| 428 | return params.identity == SurfaceParams::SurfaceClass::Uploaded; | ||
| 429 | } | ||
| 430 | |||
| 398 | private: | 431 | private: |
| 399 | void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); | 432 | void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); |
| 400 | 433 | ||
| @@ -408,6 +441,9 @@ private: | |||
| 408 | GLenum gl_internal_format{}; | 441 | GLenum gl_internal_format{}; |
| 409 | std::size_t cached_size_in_bytes{}; | 442 | std::size_t cached_size_in_bytes{}; |
| 410 | std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; | 443 | std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; |
| 444 | std::size_t memory_size; | ||
| 445 | bool reinterpreted = false; | ||
| 446 | bool must_reload = false; | ||
| 411 | }; | 447 | }; |
| 412 | 448 | ||
| 413 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | 449 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { |
| @@ -433,6 +469,9 @@ public: | |||
| 433 | const Common::Rectangle<u32>& src_rect, | 469 | const Common::Rectangle<u32>& src_rect, |
| 434 | const Common::Rectangle<u32>& dst_rect); | 470 | const Common::Rectangle<u32>& dst_rect); |
| 435 | 471 | ||
| 472 | void SignalPreDrawCall(); | ||
| 473 | void SignalPostDrawCall(); | ||
| 474 | |||
| 436 | private: | 475 | private: |
| 437 | void LoadSurface(const Surface& surface); | 476 | void LoadSurface(const Surface& surface); |
| 438 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); | 477 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); |
| @@ -449,6 +488,10 @@ private: | |||
| 449 | /// Tries to get a reserved surface for the specified parameters | 488 | /// Tries to get a reserved surface for the specified parameters |
| 450 | Surface TryGetReservedSurface(const SurfaceParams& params); | 489 | Surface TryGetReservedSurface(const SurfaceParams& params); |
| 451 | 490 | ||
| 491 | // Partialy reinterpret a surface based on a triggering_surface that collides with it. | ||
| 492 | // returns true if the reinterpret was successful, false in case it was not. | ||
| 493 | bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); | ||
| 494 | |||
| 452 | /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data | 495 | /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data |
| 453 | void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); | 496 | void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); |
| 454 | void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); | 497 | void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); |
| @@ -465,12 +508,50 @@ private: | |||
| 465 | OGLFramebuffer read_framebuffer; | 508 | OGLFramebuffer read_framebuffer; |
| 466 | OGLFramebuffer draw_framebuffer; | 509 | OGLFramebuffer draw_framebuffer; |
| 467 | 510 | ||
| 511 | bool texception = false; | ||
| 512 | |||
| 468 | /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one | 513 | /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one |
| 469 | /// using the new format. | 514 | /// using the new format. |
| 470 | OGLBuffer copy_pbo; | 515 | OGLBuffer copy_pbo; |
| 471 | 516 | ||
| 472 | std::array<Surface, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> last_color_buffers; | 517 | std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; |
| 518 | std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; | ||
| 473 | Surface last_depth_buffer; | 519 | Surface last_depth_buffer; |
| 520 | |||
| 521 | using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; | ||
| 522 | using SurfaceInterval = typename SurfaceIntervalCache::interval_type; | ||
| 523 | |||
| 524 | static auto GetReinterpretInterval(const Surface& object) { | ||
| 525 | return SurfaceInterval::right_open(object->GetAddr() + 1, | ||
| 526 | object->GetAddr() + object->GetMemorySize() - 1); | ||
| 527 | } | ||
| 528 | |||
| 529 | // Reinterpreted surfaces are very fragil as the game may keep rendering into them. | ||
| 530 | SurfaceIntervalCache reinterpreted_surfaces; | ||
| 531 | |||
| 532 | void RegisterReinterpretSurface(Surface reinterpret_surface) { | ||
| 533 | auto interval = GetReinterpretInterval(reinterpret_surface); | ||
| 534 | reinterpreted_surfaces.insert({interval, reinterpret_surface}); | ||
| 535 | reinterpret_surface->MarkReinterpreted(); | ||
| 536 | } | ||
| 537 | |||
| 538 | Surface CollideOnReinterpretedSurface(VAddr addr) const { | ||
| 539 | const SurfaceInterval interval{addr}; | ||
| 540 | for (auto& pair : | ||
| 541 | boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { | ||
| 542 | return pair.second; | ||
| 543 | } | ||
| 544 | return nullptr; | ||
| 545 | } | ||
| 546 | |||
| 547 | /// Unregisters an object from the cache | ||
| 548 | void UnregisterSurface(const Surface& object) { | ||
| 549 | if (object->IsReinterpreted()) { | ||
| 550 | auto interval = GetReinterpretInterval(object); | ||
| 551 | reinterpreted_surfaces.erase(interval); | ||
| 552 | } | ||
| 553 | Unregister(object); | ||
| 554 | } | ||
| 474 | }; | 555 | }; |
| 475 | 556 | ||
| 476 | } // namespace OpenGL | 557 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 17ee93b91..0451babbf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp | |||
| @@ -238,7 +238,7 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 | |||
| 238 | 238 | ||
| 239 | VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, | 239 | VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, |
| 240 | u8* data, u64 begin, u64 end) | 240 | u8* data, u64 begin, u64 end) |
| 241 | : allocation{allocation}, memory{memory}, data{data}, interval(std::make_pair(begin, end)) {} | 241 | : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} |
| 242 | 242 | ||
| 243 | VKMemoryCommitImpl::~VKMemoryCommitImpl() { | 243 | VKMemoryCommitImpl::~VKMemoryCommitImpl() { |
| 244 | allocation->Free(this); | 244 | allocation->Free(this); |
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 1678463c7..a1e117443 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp | |||
| @@ -125,11 +125,12 @@ void VKFence::Protect(VKResource* resource) { | |||
| 125 | protected_resources.push_back(resource); | 125 | protected_resources.push_back(resource); |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | void VKFence::Unprotect(const VKResource* resource) { | 128 | void VKFence::Unprotect(VKResource* resource) { |
| 129 | const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); | 129 | const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); |
| 130 | if (it != protected_resources.end()) { | 130 | ASSERT(it != protected_resources.end()); |
| 131 | protected_resources.erase(it); | 131 | |
| 132 | } | 132 | resource->OnFenceRemoval(this); |
| 133 | protected_resources.erase(it); | ||
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | VKFenceWatch::VKFenceWatch() = default; | 136 | VKFenceWatch::VKFenceWatch() = default; |
| @@ -141,12 +142,11 @@ VKFenceWatch::~VKFenceWatch() { | |||
| 141 | } | 142 | } |
| 142 | 143 | ||
| 143 | void VKFenceWatch::Wait() { | 144 | void VKFenceWatch::Wait() { |
| 144 | if (!fence) { | 145 | if (fence == nullptr) { |
| 145 | return; | 146 | return; |
| 146 | } | 147 | } |
| 147 | fence->Wait(); | 148 | fence->Wait(); |
| 148 | fence->Unprotect(this); | 149 | fence->Unprotect(this); |
| 149 | fence = nullptr; | ||
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | void VKFenceWatch::Watch(VKFence& new_fence) { | 152 | void VKFenceWatch::Watch(VKFence& new_fence) { |
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index 5018dfa44..5bfe4cead 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h | |||
| @@ -63,7 +63,7 @@ public: | |||
| 63 | void Protect(VKResource* resource); | 63 | void Protect(VKResource* resource); |
| 64 | 64 | ||
| 65 | /// Removes protection for a resource. | 65 | /// Removes protection for a resource. |
| 66 | void Unprotect(const VKResource* resource); | 66 | void Unprotect(VKResource* resource); |
| 67 | 67 | ||
| 68 | /// Retreives the fence. | 68 | /// Retreives the fence. |
| 69 | operator vk::Fence() const { | 69 | operator vk::Fence() const { |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp new file mode 100644 index 000000000..58ffa42f2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -0,0 +1,90 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <optional> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 17 | |||
| 18 | namespace Vulkan { | ||
| 19 | |||
| 20 | constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; | ||
| 21 | constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | ||
| 22 | |||
| 23 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, | ||
| 24 | VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, | ||
| 25 | vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) | ||
| 26 | : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ | ||
| 27 | pipeline_stage} { | ||
| 28 | CreateBuffers(memory_manager, usage); | ||
| 29 | ReserveWatches(WATCHES_INITIAL_RESERVE); | ||
| 30 | } | ||
| 31 | |||
| 32 | VKStreamBuffer::~VKStreamBuffer() = default; | ||
| 33 | |||
| 34 | std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) { | ||
| 35 | ASSERT(size <= buffer_size); | ||
| 36 | mapped_size = size; | ||
| 37 | |||
| 38 | if (offset + size > buffer_size) { | ||
| 39 | // The buffer would overflow, save the amount of used buffers, signal an invalidation and | ||
| 40 | // reset the state. | ||
| 41 | invalidation_mark = used_watches; | ||
| 42 | used_watches = 0; | ||
| 43 | offset = 0; | ||
| 44 | } | ||
| 45 | |||
| 46 | return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; | ||
| 47 | } | ||
| 48 | |||
| 49 | VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { | ||
| 50 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | ||
| 51 | |||
| 52 | if (invalidation_mark) { | ||
| 53 | // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. | ||
| 54 | exctx = scheduler.Flush(); | ||
| 55 | std::for_each(watches.begin(), watches.begin() + *invalidation_mark, | ||
| 56 | [&](auto& resource) { resource->Wait(); }); | ||
| 57 | invalidation_mark = std::nullopt; | ||
| 58 | } | ||
| 59 | |||
| 60 | if (used_watches + 1 >= watches.size()) { | ||
| 61 | // Ensure that there are enough watches. | ||
| 62 | ReserveWatches(WATCHES_RESERVE_CHUNK); | ||
| 63 | } | ||
| 64 | // Add a watch for this allocation. | ||
| 65 | watches[used_watches++]->Watch(exctx.GetFence()); | ||
| 66 | |||
| 67 | offset += size; | ||
| 68 | |||
| 69 | return exctx; | ||
| 70 | } | ||
| 71 | |||
| 72 | void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { | ||
| 73 | const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, | ||
| 74 | nullptr); | ||
| 75 | |||
| 76 | const auto dev = device.GetLogical(); | ||
| 77 | const auto& dld = device.GetDispatchLoader(); | ||
| 78 | buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); | ||
| 79 | commit = memory_manager.Commit(*buffer, true); | ||
| 80 | mapped_pointer = commit->GetData(); | ||
| 81 | } | ||
| 82 | |||
| 83 | void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { | ||
| 84 | const std::size_t previous_size = watches.size(); | ||
| 85 | watches.resize(previous_size + grow_size); | ||
| 86 | std::generate(watches.begin() + previous_size, watches.end(), | ||
| 87 | []() { return std::make_unique<VKFenceWatch>(); }); | ||
| 88 | } | ||
| 89 | |||
| 90 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h new file mode 100644 index 000000000..69d036ccd --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <optional> | ||
| 9 | #include <tuple> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||
| 15 | |||
| 16 | namespace Vulkan { | ||
| 17 | |||
| 18 | class VKDevice; | ||
| 19 | class VKFence; | ||
| 20 | class VKFenceWatch; | ||
| 21 | class VKResourceManager; | ||
| 22 | class VKScheduler; | ||
| 23 | |||
| 24 | class VKStreamBuffer { | ||
| 25 | public: | ||
| 26 | explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, | ||
| 27 | VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, | ||
| 28 | vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); | ||
| 29 | ~VKStreamBuffer(); | ||
| 30 | |||
| 31 | /** | ||
| 32 | * Reserves a region of memory from the stream buffer. | ||
| 33 | * @param size Size to reserve. | ||
| 34 | * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer | ||
| 35 | * offset and a boolean that's true when buffer has been invalidated. | ||
| 36 | */ | ||
| 37 | std::tuple<u8*, u64, bool> Reserve(u64 size); | ||
| 38 | |||
| 39 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | ||
| 40 | [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); | ||
| 41 | |||
| 42 | vk::Buffer GetBuffer() const { | ||
| 43 | return *buffer; | ||
| 44 | } | ||
| 45 | |||
| 46 | private: | ||
| 47 | /// Creates Vulkan buffer handles committing the required the required memory. | ||
| 48 | void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); | ||
| 49 | |||
| 50 | /// Increases the amount of watches available. | ||
| 51 | void ReserveWatches(std::size_t grow_size); | ||
| 52 | |||
| 53 | const VKDevice& device; ///< Vulkan device manager. | ||
| 54 | VKScheduler& scheduler; ///< Command scheduler. | ||
| 55 | const u64 buffer_size; ///< Total size of the stream buffer. | ||
| 56 | const vk::AccessFlags access; ///< Access usage of this stream buffer. | ||
| 57 | const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. | ||
| 58 | |||
| 59 | UniqueBuffer buffer; ///< Mapped buffer. | ||
| 60 | VKMemoryCommit commit; ///< Memory commit. | ||
| 61 | u8* mapped_pointer{}; ///< Pointer to the host visible commit | ||
| 62 | |||
| 63 | u64 offset{}; ///< Buffer iterator. | ||
| 64 | u64 mapped_size{}; ///< Size reserved for the current copy. | ||
| 65 | |||
| 66 | std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Total watches | ||
| 67 | std::size_t used_watches{}; ///< Count of watches, reset on invalidation. | ||
| 68 | std::optional<std::size_t> | ||
| 69 | invalidation_mark{}; ///< Number of watches used in the current invalidation. | ||
| 70 | }; | ||
| 71 | |||
| 72 | } // namespace Vulkan | ||