diff options
| author | 2018-03-20 21:37:43 -0400 | |
|---|---|---|
| committer | 2018-03-20 21:37:43 -0400 | |
| commit | 0b3ab30762ed09be88e945d27a83bdfe9e77b49c (patch) | |
| tree | 2ce527d9139a25fc1c728a7f4dd2f3baf2df13d1 /src | |
| parent | Merge pull request #256 from mailwl/fatal (diff) | |
| parent | renderer_gl: Port boilerplate rasterizer code over from Citra. (diff) | |
| download | yuzu-0b3ab30762ed09be88e945d27a83bdfe9e77b49c.tar.gz yuzu-0b3ab30762ed09be88e945d27a83bdfe9e77b49c.tar.xz yuzu-0b3ab30762ed09be88e945d27a83bdfe9e77b49c.zip | |
Merge pull request #254 from bunnei/port-citra-renderer
Port Citra OpenGL rasterizer code
Diffstat (limited to 'src')
18 files changed, 2905 insertions, 101 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2f946e7be..e56253c4c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -11,13 +11,24 @@ add_library(video_core STATIC | |||
| 11 | gpu.h | 11 | gpu.h |
| 12 | memory_manager.cpp | 12 | memory_manager.cpp |
| 13 | memory_manager.h | 13 | memory_manager.h |
| 14 | rasterizer_interface.h | ||
| 14 | renderer_base.cpp | 15 | renderer_base.cpp |
| 15 | renderer_base.h | 16 | renderer_base.h |
| 17 | renderer_opengl/gl_rasterizer.cpp | ||
| 18 | renderer_opengl/gl_rasterizer.h | ||
| 19 | renderer_opengl/gl_rasterizer_cache.cpp | ||
| 20 | renderer_opengl/gl_rasterizer_cache.h | ||
| 16 | renderer_opengl/gl_resource_manager.h | 21 | renderer_opengl/gl_resource_manager.h |
| 22 | renderer_opengl/gl_shader_decompiler.cpp | ||
| 23 | renderer_opengl/gl_shader_decompiler.h | ||
| 24 | renderer_opengl/gl_shader_gen.cpp | ||
| 25 | renderer_opengl/gl_shader_gen.h | ||
| 17 | renderer_opengl/gl_shader_util.cpp | 26 | renderer_opengl/gl_shader_util.cpp |
| 18 | renderer_opengl/gl_shader_util.h | 27 | renderer_opengl/gl_shader_util.h |
| 19 | renderer_opengl/gl_state.cpp | 28 | renderer_opengl/gl_state.cpp |
| 20 | renderer_opengl/gl_state.h | 29 | renderer_opengl/gl_state.h |
| 30 | renderer_opengl/gl_stream_buffer.cpp | ||
| 31 | renderer_opengl/gl_stream_buffer.h | ||
| 21 | renderer_opengl/renderer_opengl.cpp | 32 | renderer_opengl/renderer_opengl.cpp |
| 22 | renderer_opengl/renderer_opengl.h | 33 | renderer_opengl/renderer_opengl.h |
| 23 | utils.h | 34 | utils.h |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h new file mode 100644 index 000000000..6c7bd0826 --- /dev/null +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -0,0 +1,61 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | struct ScreenInfo; | ||
| 10 | |||
| 11 | namespace VideoCore { | ||
| 12 | |||
| 13 | class RasterizerInterface { | ||
| 14 | public: | ||
| 15 | virtual ~RasterizerInterface() {} | ||
| 16 | |||
| 17 | /// Draw the current batch of triangles | ||
| 18 | virtual void DrawTriangles() = 0; | ||
| 19 | |||
| 20 | /// Notify rasterizer that the specified Maxwell register has been changed | ||
| 21 | virtual void NotifyMaxwellRegisterChanged(u32 id) = 0; | ||
| 22 | |||
| 23 | /// Notify rasterizer that all caches should be flushed to 3DS memory | ||
| 24 | virtual void FlushAll() = 0; | ||
| 25 | |||
| 26 | /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory | ||
| 27 | virtual void FlushRegion(PAddr addr, u32 size) = 0; | ||
| 28 | |||
| 29 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 30 | virtual void InvalidateRegion(PAddr addr, u32 size) = 0; | ||
| 31 | |||
| 32 | /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory | ||
| 33 | /// and invalidated | ||
| 34 | virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; | ||
| 35 | |||
| 36 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 | ||
| 37 | virtual bool AccelerateDisplayTransfer(const void* config) { | ||
| 38 | return false; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1 | ||
| 42 | virtual bool AccelerateTextureCopy(const void* config) { | ||
| 43 | return false; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Attempt to use a faster method to fill a region | ||
| 47 | virtual bool AccelerateFill(const void* config) { | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | |||
| 51 | /// Attempt to use a faster method to display the framebuffer to screen | ||
| 52 | virtual bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, | ||
| 53 | ScreenInfo& screen_info) { | ||
| 54 | return false; | ||
| 55 | } | ||
| 56 | |||
| 57 | virtual bool AccelerateDrawBatch(bool is_indexed) { | ||
| 58 | return false; | ||
| 59 | } | ||
| 60 | }; | ||
| 61 | } // namespace VideoCore | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp new file mode 100644 index 000000000..24cfff229 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -0,0 +1,269 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <string> | ||
| 7 | #include <tuple> | ||
| 8 | #include <utility> | ||
| 9 | #include <glad/glad.h> | ||
| 10 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "common/math_util.h" | ||
| 14 | #include "common/microprofile.h" | ||
| 15 | #include "common/scope_exit.h" | ||
| 16 | #include "common/vector_math.h" | ||
| 17 | #include "core/settings.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 20 | #include "video_core/renderer_opengl/renderer_opengl.h" | ||
| 21 | |||
| 22 | using PixelFormat = SurfaceParams::PixelFormat; | ||
| 23 | using SurfaceType = SurfaceParams::SurfaceType; | ||
| 24 | |||
| 25 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192)); | ||
| 26 | MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(128, 128, 192)); | ||
| 27 | MICROPROFILE_DEFINE(OpenGL_FS, "OpenGL", "Fragment Shader Setup", MP_RGB(128, 128, 192)); | ||
| 28 | MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||
| 29 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); | ||
| 30 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||
| 31 | |||
| 32 | enum class UniformBindings : GLuint { Common, VS, FS }; | ||
| 33 | |||
| 34 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, | ||
| 35 | size_t expected_size) { | ||
| 36 | GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||
| 37 | if (ub_index != GL_INVALID_INDEX) { | ||
| 38 | GLint ub_size = 0; | ||
| 39 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 40 | ASSERT_MSG(ub_size == expected_size, | ||
| 41 | "Uniform block size did not match! Got %d, expected %zu", | ||
| 42 | static_cast<int>(ub_size), expected_size); | ||
| 43 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | static void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 48 | SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, | ||
| 49 | sizeof(RasterizerOpenGL::UniformData)); | ||
| 50 | SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, | ||
| 51 | sizeof(RasterizerOpenGL::VSUniformData)); | ||
| 52 | SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, | ||
| 53 | sizeof(RasterizerOpenGL::FSUniformData)); | ||
| 54 | } | ||
| 55 | |||
| 56 | RasterizerOpenGL::RasterizerOpenGL() { | ||
| 57 | has_ARB_buffer_storage = false; | ||
| 58 | has_ARB_direct_state_access = false; | ||
| 59 | has_ARB_separate_shader_objects = false; | ||
| 60 | has_ARB_vertex_attrib_binding = false; | ||
| 61 | |||
| 62 | GLint ext_num; | ||
| 63 | glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); | ||
| 64 | for (GLint i = 0; i < ext_num; i++) { | ||
| 65 | std::string extension{reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; | ||
| 66 | |||
| 67 | if (extension == "GL_ARB_buffer_storage") { | ||
| 68 | has_ARB_buffer_storage = true; | ||
| 69 | } else if (extension == "GL_ARB_direct_state_access") { | ||
| 70 | has_ARB_direct_state_access = true; | ||
| 71 | } else if (extension == "GL_ARB_separate_shader_objects") { | ||
| 72 | has_ARB_separate_shader_objects = true; | ||
| 73 | } else if (extension == "GL_ARB_vertex_attrib_binding") { | ||
| 74 | has_ARB_vertex_attrib_binding = true; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 | ||
| 79 | state.clip_distance[0] = true; | ||
| 80 | |||
| 81 | // Generate VBO, VAO and UBO | ||
| 82 | vertex_buffer = OGLStreamBuffer::MakeBuffer(GLAD_GL_ARB_buffer_storage, GL_ARRAY_BUFFER); | ||
| 83 | vertex_buffer->Create(VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE / 2); | ||
| 84 | sw_vao.Create(); | ||
| 85 | uniform_buffer.Create(); | ||
| 86 | |||
| 87 | state.draw.vertex_array = sw_vao.handle; | ||
| 88 | state.draw.vertex_buffer = vertex_buffer->GetHandle(); | ||
| 89 | state.draw.uniform_buffer = uniform_buffer.handle; | ||
| 90 | state.Apply(); | ||
| 91 | |||
| 92 | glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); | ||
| 93 | glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); | ||
| 94 | |||
| 95 | uniform_block_data.dirty = true; | ||
| 96 | |||
| 97 | // Create render framebuffer | ||
| 98 | framebuffer.Create(); | ||
| 99 | |||
| 100 | if (has_ARB_separate_shader_objects) { | ||
| 101 | hw_vao.Create(); | ||
| 102 | hw_vao_enabled_attributes.fill(false); | ||
| 103 | |||
| 104 | stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); | ||
| 105 | stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); | ||
| 106 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||
| 107 | |||
| 108 | pipeline.Create(); | ||
| 109 | vs_input_index_min = 0; | ||
| 110 | vs_input_index_max = 0; | ||
| 111 | state.draw.program_pipeline = pipeline.handle; | ||
| 112 | state.draw.shader_program = 0; | ||
| 113 | state.draw.vertex_array = hw_vao.handle; | ||
| 114 | state.Apply(); | ||
| 115 | |||
| 116 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); | ||
| 117 | |||
| 118 | vs_uniform_buffer.Create(); | ||
| 119 | glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); | ||
| 120 | glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); | ||
| 121 | glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); | ||
| 122 | } else { | ||
| 123 | UNIMPLEMENTED(); | ||
| 124 | } | ||
| 125 | |||
| 126 | accelerate_draw = AccelDraw::Disabled; | ||
| 127 | |||
| 128 | glEnable(GL_BLEND); | ||
| 129 | |||
| 130 | // Sync fixed function OpenGL state | ||
| 131 | SyncClipEnabled(); | ||
| 132 | SyncClipCoef(); | ||
| 133 | SyncCullMode(); | ||
| 134 | SyncBlendEnabled(); | ||
| 135 | SyncBlendFuncs(); | ||
| 136 | SyncBlendColor(); | ||
| 137 | } | ||
| 138 | |||
| 139 | RasterizerOpenGL::~RasterizerOpenGL() { | ||
| 140 | if (stream_buffer != nullptr) { | ||
| 141 | state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||
| 142 | state.Apply(); | ||
| 143 | stream_buffer->Release(); | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | static constexpr std::array<GLenum, 4> vs_attrib_types{ | ||
| 148 | GL_BYTE, // VertexAttributeFormat::BYTE | ||
| 149 | GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE | ||
| 150 | GL_SHORT, // VertexAttributeFormat::SHORT | ||
| 151 | GL_FLOAT // VertexAttributeFormat::FLOAT | ||
| 152 | }; | ||
| 153 | |||
| 154 | void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { | ||
| 155 | UNIMPLEMENTED(); | ||
| 156 | } | ||
| 157 | |||
| 158 | void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | ||
| 159 | MICROPROFILE_SCOPE(OpenGL_VAO); | ||
| 160 | UNIMPLEMENTED(); | ||
| 161 | } | ||
| 162 | |||
| 163 | void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { | ||
| 164 | MICROPROFILE_SCOPE(OpenGL_VS); | ||
| 165 | UNIMPLEMENTED(); | ||
| 166 | } | ||
| 167 | |||
| 168 | void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { | ||
| 169 | MICROPROFILE_SCOPE(OpenGL_FS); | ||
| 170 | UNIMPLEMENTED(); | ||
| 171 | } | ||
| 172 | |||
| 173 | bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||
| 174 | if (!has_ARB_separate_shader_objects) { | ||
| 175 | UNIMPLEMENTED(); | ||
| 176 | return false; | ||
| 177 | } | ||
| 178 | |||
| 179 | accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||
| 180 | DrawTriangles(); | ||
| 181 | |||
| 182 | return true; | ||
| 183 | } | ||
| 184 | |||
| 185 | void RasterizerOpenGL::DrawTriangles() { | ||
| 186 | MICROPROFILE_SCOPE(OpenGL_Drawing); | ||
| 187 | UNIMPLEMENTED(); | ||
| 188 | } | ||
| 189 | |||
| 190 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} | ||
| 191 | |||
| 192 | void RasterizerOpenGL::FlushAll() { | ||
| 193 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 194 | res_cache.FlushAll(); | ||
| 195 | } | ||
| 196 | |||
| 197 | void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { | ||
| 198 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 199 | res_cache.FlushRegion(addr, size); | ||
| 200 | } | ||
| 201 | |||
| 202 | void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { | ||
| 203 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 204 | res_cache.InvalidateRegion(addr, size, nullptr); | ||
| 205 | } | ||
| 206 | |||
| 207 | void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { | ||
| 208 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 209 | res_cache.FlushRegion(addr, size); | ||
| 210 | res_cache.InvalidateRegion(addr, size, nullptr); | ||
| 211 | } | ||
| 212 | |||
| 213 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { | ||
| 214 | MICROPROFILE_SCOPE(OpenGL_Blits); | ||
| 215 | UNIMPLEMENTED(); | ||
| 216 | return true; | ||
| 217 | } | ||
| 218 | |||
| 219 | bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { | ||
| 220 | UNIMPLEMENTED(); | ||
| 221 | return true; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool RasterizerOpenGL::AccelerateFill(const void* config) { | ||
| 225 | UNIMPLEMENTED(); | ||
| 226 | return true; | ||
| 227 | } | ||
| 228 | |||
| 229 | bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_addr, | ||
| 230 | u32 pixel_stride, ScreenInfo& screen_info) { | ||
| 231 | UNIMPLEMENTED(); | ||
| 232 | return true; | ||
| 233 | } | ||
| 234 | |||
| 235 | void RasterizerOpenGL::SetShader() { | ||
| 236 | UNIMPLEMENTED(); | ||
| 237 | } | ||
| 238 | |||
| 239 | void RasterizerOpenGL::SyncClipEnabled() { | ||
| 240 | UNIMPLEMENTED(); | ||
| 241 | } | ||
| 242 | |||
| 243 | void RasterizerOpenGL::SyncClipCoef() { | ||
| 244 | UNIMPLEMENTED(); | ||
| 245 | } | ||
| 246 | |||
| 247 | void RasterizerOpenGL::SyncCullMode() { | ||
| 248 | UNIMPLEMENTED(); | ||
| 249 | } | ||
| 250 | |||
| 251 | void RasterizerOpenGL::SyncDepthScale() { | ||
| 252 | UNIMPLEMENTED(); | ||
| 253 | } | ||
| 254 | |||
| 255 | void RasterizerOpenGL::SyncDepthOffset() { | ||
| 256 | UNIMPLEMENTED(); | ||
| 257 | } | ||
| 258 | |||
| 259 | void RasterizerOpenGL::SyncBlendEnabled() { | ||
| 260 | UNIMPLEMENTED(); | ||
| 261 | } | ||
| 262 | |||
| 263 | void RasterizerOpenGL::SyncBlendFuncs() { | ||
| 264 | UNIMPLEMENTED(); | ||
| 265 | } | ||
| 266 | |||
| 267 | void RasterizerOpenGL::SyncBlendColor() { | ||
| 268 | UNIMPLEMENTED(); | ||
| 269 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h new file mode 100644 index 000000000..893fc530f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -0,0 +1,162 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstring> | ||
| 10 | #include <memory> | ||
| 11 | #include <unordered_map> | ||
| 12 | #include <vector> | ||
| 13 | #include <glad/glad.h> | ||
| 14 | #include "common/bit_field.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "common/hash.h" | ||
| 17 | #include "common/vector_math.h" | ||
| 18 | #include "video_core/rasterizer_interface.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 24 | |||
| 25 | struct ScreenInfo; | ||
| 26 | |||
| 27 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { | ||
| 28 | public: | ||
| 29 | RasterizerOpenGL(); | ||
| 30 | ~RasterizerOpenGL() override; | ||
| 31 | |||
| 32 | void DrawTriangles() override; | ||
| 33 | void NotifyMaxwellRegisterChanged(u32 id) override; | ||
| 34 | void FlushAll() override; | ||
| 35 | void FlushRegion(PAddr addr, u32 size) override; | ||
| 36 | void InvalidateRegion(PAddr addr, u32 size) override; | ||
| 37 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override; | ||
| 38 | bool AccelerateDisplayTransfer(const void* config) override; | ||
| 39 | bool AccelerateTextureCopy(const void* config) override; | ||
| 40 | bool AccelerateFill(const void* config) override; | ||
| 41 | bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, | ||
| 42 | ScreenInfo& screen_info) override; | ||
| 43 | bool AccelerateDrawBatch(bool is_indexed) override; | ||
| 44 | |||
| 45 | struct VertexShader { | ||
| 46 | OGLShader shader; | ||
| 47 | }; | ||
| 48 | |||
| 49 | struct FragmentShader { | ||
| 50 | OGLShader shader; | ||
| 51 | }; | ||
| 52 | |||
| 53 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | ||
| 54 | // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at | ||
| 55 | // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. | ||
| 56 | // Not following that rule will cause problems on some AMD drivers. | ||
| 57 | struct UniformData {}; | ||
| 58 | |||
| 59 | // static_assert( | ||
| 60 | // sizeof(UniformData) == 0x460, | ||
| 61 | // "The size of the UniformData structure has changed, update the structure in the shader"); | ||
| 62 | static_assert(sizeof(UniformData) < 16384, | ||
| 63 | "UniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 64 | |||
| 65 | struct VSUniformData {}; | ||
| 66 | // static_assert( | ||
| 67 | // sizeof(VSUniformData) == 1856, | ||
| 68 | // "The size of the VSUniformData structure has changed, update the structure in the | ||
| 69 | // shader"); | ||
| 70 | static_assert(sizeof(VSUniformData) < 16384, | ||
| 71 | "VSUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 72 | |||
| 73 | struct FSUniformData {}; | ||
| 74 | // static_assert( | ||
| 75 | // sizeof(FSUniformData) == 1856, | ||
| 76 | // "The size of the FSUniformData structure has changed, update the structure in the | ||
| 77 | // shader"); | ||
| 78 | static_assert(sizeof(FSUniformData) < 16384, | ||
| 79 | "FSUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 80 | |||
| 81 | private: | ||
| 82 | struct SamplerInfo {}; | ||
| 83 | |||
| 84 | /// Syncs the clip enabled status to match the guest state | ||
| 85 | void SyncClipEnabled(); | ||
| 86 | |||
| 87 | /// Syncs the clip coefficients to match the guest state | ||
| 88 | void SyncClipCoef(); | ||
| 89 | |||
| 90 | /// Sets the OpenGL shader in accordance with the current guest state | ||
| 91 | void SetShader(); | ||
| 92 | |||
| 93 | /// Syncs the cull mode to match the guest state | ||
| 94 | void SyncCullMode(); | ||
| 95 | |||
| 96 | /// Syncs the depth scale to match the guest state | ||
| 97 | void SyncDepthScale(); | ||
| 98 | |||
| 99 | /// Syncs the depth offset to match the guest state | ||
| 100 | void SyncDepthOffset(); | ||
| 101 | |||
| 102 | /// Syncs the blend enabled status to match the guest state | ||
| 103 | void SyncBlendEnabled(); | ||
| 104 | |||
| 105 | /// Syncs the blend functions to match the guest state | ||
| 106 | void SyncBlendFuncs(); | ||
| 107 | |||
| 108 | /// Syncs the blend color to match the guest state | ||
| 109 | void SyncBlendColor(); | ||
| 110 | |||
| 111 | bool has_ARB_buffer_storage; | ||
| 112 | bool has_ARB_direct_state_access; | ||
| 113 | bool has_ARB_separate_shader_objects; | ||
| 114 | bool has_ARB_vertex_attrib_binding; | ||
| 115 | |||
| 116 | OpenGLState state; | ||
| 117 | |||
| 118 | RasterizerCacheOpenGL res_cache; | ||
| 119 | |||
| 120 | struct { | ||
| 121 | UniformData data; | ||
| 122 | bool dirty; | ||
| 123 | } uniform_block_data = {}; | ||
| 124 | |||
| 125 | OGLPipeline pipeline; | ||
| 126 | OGLVertexArray sw_vao; | ||
| 127 | OGLVertexArray hw_vao; | ||
| 128 | std::array<bool, 16> hw_vao_enabled_attributes; | ||
| 129 | |||
| 130 | std::array<SamplerInfo, 3> texture_samplers; | ||
| 131 | static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; | ||
| 132 | std::unique_ptr<OGLStreamBuffer> vertex_buffer; | ||
| 133 | OGLBuffer uniform_buffer; | ||
| 134 | OGLFramebuffer framebuffer; | ||
| 135 | |||
| 136 | static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; | ||
| 137 | std::unique_ptr<OGLStreamBuffer> stream_buffer; | ||
| 138 | |||
| 139 | GLint vs_input_index_min; | ||
| 140 | GLint vs_input_index_max; | ||
| 141 | GLsizeiptr vs_input_size; | ||
| 142 | |||
| 143 | void AnalyzeVertexArray(bool is_indexed); | ||
| 144 | void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); | ||
| 145 | |||
| 146 | OGLBuffer vs_uniform_buffer; | ||
| 147 | std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; | ||
| 148 | std::unordered_map<std::string, VertexShader> vs_shader_cache; | ||
| 149 | OGLShader vs_default_shader; | ||
| 150 | |||
| 151 | void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); | ||
| 152 | |||
| 153 | OGLBuffer fs_uniform_buffer; | ||
| 154 | std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; | ||
| 155 | std::unordered_map<std::string, FragmentShader> fs_shader_cache; | ||
| 156 | OGLShader fs_default_shader; | ||
| 157 | |||
| 158 | void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); | ||
| 159 | |||
| 160 | enum class AccelDraw { Disabled, Arrays, Indexed }; | ||
| 161 | AccelDraw accelerate_draw; | ||
| 162 | }; | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp new file mode 100644 index 000000000..884637ca5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -0,0 +1,1361 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <atomic> | ||
| 7 | #include <cstring> | ||
| 8 | #include <iterator> | ||
| 9 | #include <memory> | ||
| 10 | #include <unordered_set> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | #include <boost/optional.hpp> | ||
| 14 | #include <boost/range/iterator_range.hpp> | ||
| 15 | #include <glad/glad.h> | ||
| 16 | #include "common/alignment.h" | ||
| 17 | #include "common/bit_field.h" | ||
| 18 | #include "common/color.h" | ||
| 19 | #include "common/logging/log.h" | ||
| 20 | #include "common/math_util.h" | ||
| 21 | #include "common/microprofile.h" | ||
| 22 | #include "common/scope_exit.h" | ||
| 23 | #include "common/vector_math.h" | ||
| 24 | #include "core/frontend/emu_window.h" | ||
| 25 | #include "core/memory.h" | ||
| 26 | #include "core/settings.h" | ||
| 27 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 29 | #include "video_core/utils.h" | ||
| 30 | #include "video_core/video_core.h" | ||
| 31 | |||
| 32 | using SurfaceType = SurfaceParams::SurfaceType; | ||
| 33 | using PixelFormat = SurfaceParams::PixelFormat; | ||
| 34 | |||
| 35 | struct FormatTuple { | ||
| 36 | GLint internal_format; | ||
| 37 | GLenum format; | ||
| 38 | GLenum type; | ||
| 39 | }; | ||
| 40 | |||
| 41 | static constexpr std::array<FormatTuple, 5> fb_format_tuples = {{ | ||
| 42 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 | ||
| 43 | {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 | ||
| 44 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 | ||
| 45 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 | ||
| 46 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 | ||
| 47 | }}; | ||
| 48 | |||
| 49 | static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{ | ||
| 50 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 | ||
| 51 | {}, | ||
| 52 | {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 | ||
| 53 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 | ||
| 54 | }}; | ||
| 55 | |||
| 56 | static constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; | ||
| 57 | |||
| 58 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | ||
| 59 | const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); | ||
| 60 | if (type == SurfaceType::Color) { | ||
| 61 | ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size()); | ||
| 62 | return fb_format_tuples[static_cast<unsigned int>(pixel_format)]; | ||
| 63 | } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { | ||
| 64 | size_t tuple_idx = static_cast<size_t>(pixel_format) - 14; | ||
| 65 | ASSERT(tuple_idx < depth_format_tuples.size()); | ||
| 66 | return depth_format_tuples[tuple_idx]; | ||
| 67 | } | ||
| 68 | return tex_tuple; | ||
| 69 | } | ||
| 70 | |||
| 71 | template <typename Map, typename Interval> | ||
| 72 | constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||
| 73 | return boost::make_iterator_range(map.equal_range(interval)); | ||
| 74 | } | ||
| 75 | |||
| 76 | static u16 GetResolutionScaleFactor() { | ||
| 77 | return static_cast<u16>(!Settings::values.resolution_factor | ||
| 78 | ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() | ||
| 79 | : Settings::values.resolution_factor); | ||
| 80 | } | ||
| 81 | |||
| 82 | template <bool morton_to_gl, PixelFormat format> | ||
| 83 | static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { | ||
| 84 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | ||
| 85 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||
| 86 | for (u32 y = 0; y < 8; ++y) { | ||
| 87 | for (u32 x = 0; x < 8; ++x) { | ||
| 88 | u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; | ||
| 89 | u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; | ||
| 90 | if (morton_to_gl) { | ||
| 91 | if (format == PixelFormat::D24S8) { | ||
| 92 | gl_ptr[0] = tile_ptr[3]; | ||
| 93 | std::memcpy(gl_ptr + 1, tile_ptr, 3); | ||
| 94 | } else { | ||
| 95 | std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); | ||
| 96 | } | ||
| 97 | } else { | ||
| 98 | if (format == PixelFormat::D24S8) { | ||
| 99 | std::memcpy(tile_ptr, gl_ptr + 1, 3); | ||
| 100 | tile_ptr[3] = gl_ptr[0]; | ||
| 101 | } else { | ||
| 102 | std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | } | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | template <bool morton_to_gl, PixelFormat format> | ||
| 110 | static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { | ||
| 111 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; | ||
| 112 | constexpr u32 tile_size = bytes_per_pixel * 64; | ||
| 113 | |||
| 114 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | ||
| 115 | static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); | ||
| 116 | gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; | ||
| 117 | |||
| 118 | const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); | ||
| 119 | const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); | ||
| 120 | const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); | ||
| 121 | |||
| 122 | ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); | ||
| 123 | |||
| 124 | const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; | ||
| 125 | u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8); | ||
| 126 | u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8); | ||
| 127 | |||
| 128 | gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; | ||
| 129 | |||
| 130 | auto glbuf_next_tile = [&] { | ||
| 131 | x = (x + 8) % stride; | ||
| 132 | gl_buffer += 8 * gl_bytes_per_pixel; | ||
| 133 | if (!x) { | ||
| 134 | y += 8; | ||
| 135 | gl_buffer -= stride * 9 * gl_bytes_per_pixel; | ||
| 136 | } | ||
| 137 | }; | ||
| 138 | |||
| 139 | u8* tile_buffer = Memory::GetPhysicalPointer(start); | ||
| 140 | |||
| 141 | if (start < aligned_start && !morton_to_gl) { | ||
| 142 | std::array<u8, tile_size> tmp_buf; | ||
| 143 | MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); | ||
| 144 | std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], | ||
| 145 | std::min(aligned_start, end) - start); | ||
| 146 | |||
| 147 | tile_buffer += aligned_start - start; | ||
| 148 | glbuf_next_tile(); | ||
| 149 | } | ||
| 150 | |||
| 151 | const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; | ||
| 152 | while (tile_buffer < buffer_end) { | ||
| 153 | MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); | ||
| 154 | tile_buffer += tile_size; | ||
| 155 | glbuf_next_tile(); | ||
| 156 | } | ||
| 157 | |||
| 158 | if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { | ||
| 159 | std::array<u8, tile_size> tmp_buf; | ||
| 160 | MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); | ||
| 161 | std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | |||
| 165 | static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = { | ||
| 166 | MortonCopy<true, PixelFormat::RGBA8>, // 0 | ||
| 167 | MortonCopy<true, PixelFormat::RGB8>, // 1 | ||
| 168 | MortonCopy<true, PixelFormat::RGB5A1>, // 2 | ||
| 169 | MortonCopy<true, PixelFormat::RGB565>, // 3 | ||
| 170 | MortonCopy<true, PixelFormat::RGBA4>, // 4 | ||
| 171 | nullptr, | ||
| 172 | nullptr, | ||
| 173 | nullptr, | ||
| 174 | nullptr, | ||
| 175 | nullptr, | ||
| 176 | nullptr, | ||
| 177 | nullptr, | ||
| 178 | nullptr, | ||
| 179 | nullptr, // 5 - 13 | ||
| 180 | MortonCopy<true, PixelFormat::D16>, // 14 | ||
| 181 | nullptr, // 15 | ||
| 182 | MortonCopy<true, PixelFormat::D24>, // 16 | ||
| 183 | MortonCopy<true, PixelFormat::D24S8> // 17 | ||
| 184 | }; | ||
| 185 | |||
| 186 | static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = { | ||
| 187 | MortonCopy<false, PixelFormat::RGBA8>, // 0 | ||
| 188 | MortonCopy<false, PixelFormat::RGB8>, // 1 | ||
| 189 | MortonCopy<false, PixelFormat::RGB5A1>, // 2 | ||
| 190 | MortonCopy<false, PixelFormat::RGB565>, // 3 | ||
| 191 | MortonCopy<false, PixelFormat::RGBA4>, // 4 | ||
| 192 | nullptr, | ||
| 193 | nullptr, | ||
| 194 | nullptr, | ||
| 195 | nullptr, | ||
| 196 | nullptr, | ||
| 197 | nullptr, | ||
| 198 | nullptr, | ||
| 199 | nullptr, | ||
| 200 | nullptr, // 5 - 13 | ||
| 201 | MortonCopy<false, PixelFormat::D16>, // 14 | ||
| 202 | nullptr, // 15 | ||
| 203 | MortonCopy<false, PixelFormat::D24>, // 16 | ||
| 204 | MortonCopy<false, PixelFormat::D24S8> // 17 | ||
| 205 | }; | ||
| 206 | |||
| 207 | // Allocate an uninitialized texture of appropriate size and format for the surface | ||
| 208 | static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, | ||
| 209 | u32 height) { | ||
| 210 | OpenGLState cur_state = OpenGLState::GetCurState(); | ||
| 211 | |||
| 212 | // Keep track of previous texture bindings | ||
| 213 | GLuint old_tex = cur_state.texture_units[0].texture_2d; | ||
| 214 | cur_state.texture_units[0].texture_2d = texture; | ||
| 215 | cur_state.Apply(); | ||
| 216 | glActiveTexture(GL_TEXTURE0); | ||
| 217 | |||
| 218 | glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, | ||
| 219 | format_tuple.format, format_tuple.type, nullptr); | ||
| 220 | |||
| 221 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); | ||
| 222 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 223 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||
| 224 | glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||
| 225 | |||
| 226 | // Restore previous texture bindings | ||
| 227 | cur_state.texture_units[0].texture_2d = old_tex; | ||
| 228 | cur_state.Apply(); | ||
| 229 | } | ||
| 230 | |||
| 231 | static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, | ||
| 232 | const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, | ||
| 233 | GLuint read_fb_handle, GLuint draw_fb_handle) { | ||
| 234 | OpenGLState state = OpenGLState::GetCurState(); | ||
| 235 | |||
| 236 | OpenGLState prev_state = state; | ||
| 237 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 238 | |||
| 239 | // Make sure textures aren't bound to texture units, since going to bind them to framebuffer | ||
| 240 | // components | ||
| 241 | state.ResetTexture(src_tex); | ||
| 242 | state.ResetTexture(dst_tex); | ||
| 243 | |||
| 244 | state.draw.read_framebuffer = read_fb_handle; | ||
| 245 | state.draw.draw_framebuffer = draw_fb_handle; | ||
| 246 | state.Apply(); | ||
| 247 | |||
| 248 | u32 buffers = 0; | ||
| 249 | |||
| 250 | if (type == SurfaceType::Color || type == SurfaceType::Texture) { | ||
| 251 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, | ||
| 252 | 0); | ||
| 253 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 254 | 0); | ||
| 255 | |||
| 256 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, | ||
| 257 | 0); | ||
| 258 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 259 | 0); | ||
| 260 | |||
| 261 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 262 | } else if (type == SurfaceType::Depth) { | ||
| 263 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 264 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); | ||
| 265 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 266 | |||
| 267 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 268 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); | ||
| 269 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 270 | |||
| 271 | buffers = GL_DEPTH_BUFFER_BIT; | ||
| 272 | } else if (type == SurfaceType::DepthStencil) { | ||
| 273 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 274 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 275 | src_tex, 0); | ||
| 276 | |||
| 277 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 278 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 279 | dst_tex, 0); | ||
| 280 | |||
| 281 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 282 | } | ||
| 283 | |||
| 284 | glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, | ||
| 285 | dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, | ||
| 286 | buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | ||
| 287 | |||
| 288 | return true; | ||
| 289 | } | ||
| 290 | |||
| 291 | static bool FillSurface(const Surface& surface, const u8* fill_data, | ||
| 292 | const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { | ||
| 293 | UNIMPLEMENTED(); | ||
| 294 | return true; | ||
| 295 | } | ||
| 296 | |||
| 297 | SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { | ||
| 298 | SurfaceParams params = *this; | ||
| 299 | const u32 tiled_size = is_tiled ? 8 : 1; | ||
| 300 | const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); | ||
| 301 | PAddr aligned_start = | ||
| 302 | addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); | ||
| 303 | PAddr aligned_end = | ||
| 304 | addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); | ||
| 305 | |||
| 306 | if (aligned_end - aligned_start > stride_tiled_bytes) { | ||
| 307 | params.addr = aligned_start; | ||
| 308 | params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); | ||
| 309 | } else { | ||
| 310 | // 1 row | ||
| 311 | ASSERT(aligned_end - aligned_start == stride_tiled_bytes); | ||
| 312 | const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); | ||
| 313 | aligned_start = | ||
| 314 | addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); | ||
| 315 | aligned_end = | ||
| 316 | addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); | ||
| 317 | params.addr = aligned_start; | ||
| 318 | params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); | ||
| 319 | params.stride = params.width; | ||
| 320 | params.height = tiled_size; | ||
| 321 | } | ||
| 322 | params.UpdateParams(); | ||
| 323 | |||
| 324 | return params; | ||
| 325 | } | ||
| 326 | |||
| 327 | SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { | ||
| 328 | if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { | ||
| 329 | return {}; | ||
| 330 | } | ||
| 331 | |||
| 332 | if (is_tiled) { | ||
| 333 | unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; | ||
| 334 | unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; | ||
| 335 | unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; | ||
| 336 | unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; | ||
| 337 | } | ||
| 338 | |||
| 339 | const u32 stride_tiled = !is_tiled ? stride : stride * 8; | ||
| 340 | |||
| 341 | const u32 pixel_offset = | ||
| 342 | stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + | ||
| 343 | unscaled_rect.left; | ||
| 344 | |||
| 345 | const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); | ||
| 346 | |||
| 347 | return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; | ||
| 348 | } | ||
| 349 | |||
| 350 | MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { | ||
| 351 | const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); | ||
| 352 | |||
| 353 | if (is_tiled) { | ||
| 354 | const int x0 = (begin_pixel_index % (stride * 8)) / 8; | ||
| 355 | const int y0 = (begin_pixel_index / (stride * 8)) * 8; | ||
| 356 | // Top to bottom | ||
| 357 | return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, | ||
| 358 | height - (y0 + sub_surface.height)); | ||
| 359 | } | ||
| 360 | |||
| 361 | const int x0 = begin_pixel_index % stride; | ||
| 362 | const int y0 = begin_pixel_index / stride; | ||
| 363 | // Bottom to top | ||
| 364 | return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); | ||
| 365 | } | ||
| 366 | |||
| 367 | MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { | ||
| 368 | auto rect = GetSubRect(sub_surface); | ||
| 369 | rect.left = rect.left * res_scale; | ||
| 370 | rect.right = rect.right * res_scale; | ||
| 371 | rect.top = rect.top * res_scale; | ||
| 372 | rect.bottom = rect.bottom * res_scale; | ||
| 373 | return rect; | ||
| 374 | } | ||
| 375 | |||
| 376 | bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { | ||
| 377 | return std::tie(other_surface.addr, other_surface.width, other_surface.height, | ||
| 378 | other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == | ||
| 379 | std::tie(addr, width, height, stride, pixel_format, is_tiled) && | ||
| 380 | pixel_format != PixelFormat::Invalid; | ||
| 381 | } | ||
| 382 | |||
| 383 | bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { | ||
| 384 | return sub_surface.addr >= addr && sub_surface.end <= end && | ||
| 385 | sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && | ||
| 386 | sub_surface.is_tiled == is_tiled && | ||
| 387 | (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && | ||
| 388 | (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && | ||
| 389 | GetSubRect(sub_surface).left + sub_surface.width <= stride; | ||
| 390 | } | ||
| 391 | |||
| 392 | bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { | ||
| 393 | return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && | ||
| 394 | addr <= expanded_surface.end && expanded_surface.addr <= end && | ||
| 395 | is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && | ||
| 396 | (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % | ||
| 397 | BytesInPixels(stride * (is_tiled ? 8 : 1)) == | ||
| 398 | 0; | ||
| 399 | } | ||
| 400 | |||
| 401 | bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { | ||
| 402 | if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || | ||
| 403 | end < texcopy_params.end) { | ||
| 404 | return false; | ||
| 405 | } | ||
| 406 | if (texcopy_params.width != texcopy_params.stride) { | ||
| 407 | const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); | ||
| 408 | return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && | ||
| 409 | texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && | ||
| 410 | (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && | ||
| 411 | ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; | ||
| 412 | } | ||
| 413 | return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); | ||
| 414 | } | ||
| 415 | |||
| 416 | bool CachedSurface::CanFill(const SurfaceParams& dest_surface, | ||
| 417 | SurfaceInterval fill_interval) const { | ||
| 418 | if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && | ||
| 419 | boost::icl::first(fill_interval) >= addr && | ||
| 420 | boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range | ||
| 421 | dest_surface.FromInterval(fill_interval).GetInterval() == | ||
| 422 | fill_interval) { // make sure interval is a rectangle in dest surface | ||
| 423 | if (fill_size * 8 != dest_surface.GetFormatBpp()) { | ||
| 424 | // Check if bits repeat for our fill_size | ||
| 425 | const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); | ||
| 426 | std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); | ||
| 427 | |||
| 428 | for (u32 i = 0; i < dest_bytes_per_pixel; ++i) | ||
| 429 | std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); | ||
| 430 | |||
| 431 | for (u32 i = 0; i < fill_size; ++i) | ||
| 432 | if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], | ||
| 433 | dest_bytes_per_pixel) != 0) | ||
| 434 | return false; | ||
| 435 | |||
| 436 | if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) | ||
| 437 | return false; | ||
| 438 | } | ||
| 439 | return true; | ||
| 440 | } | ||
| 441 | return false; | ||
| 442 | } | ||
| 443 | |||
| 444 | bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, | ||
| 445 | SurfaceInterval copy_interval) const { | ||
| 446 | SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); | ||
| 447 | ASSERT(subrect_params.GetInterval() == copy_interval); | ||
| 448 | if (CanSubRect(subrect_params)) | ||
| 449 | return true; | ||
| 450 | |||
| 451 | if (CanFill(dest_surface, copy_interval)) | ||
| 452 | return true; | ||
| 453 | |||
| 454 | return false; | ||
| 455 | } | ||
| 456 | |||
| 457 | SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { | ||
| 458 | SurfaceInterval result{}; | ||
| 459 | const auto valid_regions = | ||
| 460 | SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; | ||
| 461 | for (auto& valid_interval : valid_regions) { | ||
| 462 | const SurfaceInterval aligned_interval{ | ||
| 463 | addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, | ||
| 464 | BytesInPixels(is_tiled ? 8 * 8 : 1)), | ||
| 465 | addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, | ||
| 466 | BytesInPixels(is_tiled ? 8 * 8 : 1))}; | ||
| 467 | |||
| 468 | if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || | ||
| 469 | boost::icl::length(aligned_interval) == 0) { | ||
| 470 | continue; | ||
| 471 | } | ||
| 472 | |||
| 473 | // Get the rectangle within aligned_interval | ||
| 474 | const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); | ||
| 475 | SurfaceInterval rect_interval{ | ||
| 476 | addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), | ||
| 477 | addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), | ||
| 478 | }; | ||
| 479 | if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { | ||
| 480 | // 1 row | ||
| 481 | rect_interval = aligned_interval; | ||
| 482 | } else if (boost::icl::length(rect_interval) == 0) { | ||
| 483 | // 2 rows that do not make a rectangle, return the larger one | ||
| 484 | const SurfaceInterval row1{boost::icl::first(aligned_interval), | ||
| 485 | boost::icl::first(rect_interval)}; | ||
| 486 | const SurfaceInterval row2{boost::icl::first(rect_interval), | ||
| 487 | boost::icl::last_next(aligned_interval)}; | ||
| 488 | rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; | ||
| 489 | } | ||
| 490 | |||
| 491 | if (boost::icl::length(rect_interval) > boost::icl::length(result)) { | ||
| 492 | result = rect_interval; | ||
| 493 | } | ||
| 494 | } | ||
| 495 | return result; | ||
| 496 | } | ||
| 497 | |||
| 498 | void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 499 | SurfaceInterval copy_interval) { | ||
| 500 | SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); | ||
| 501 | ASSERT(subrect_params.GetInterval() == copy_interval); | ||
| 502 | |||
| 503 | ASSERT(src_surface != dst_surface); | ||
| 504 | |||
| 505 | // This is only called when CanCopy is true, no need to run checks here | ||
| 506 | if (src_surface->type == SurfaceType::Fill) { | ||
| 507 | // FillSurface needs a 4 bytes buffer | ||
| 508 | const u64 fill_offset = | ||
| 509 | (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; | ||
| 510 | std::array<u8, 4> fill_buffer; | ||
| 511 | |||
| 512 | u64 fill_buff_pos = fill_offset; | ||
| 513 | for (int i : {0, 1, 2, 3}) | ||
| 514 | fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; | ||
| 515 | |||
| 516 | FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), | ||
| 517 | draw_framebuffer.handle); | ||
| 518 | return; | ||
| 519 | } | ||
| 520 | if (src_surface->CanSubRect(subrect_params)) { | ||
| 521 | BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), | ||
| 522 | dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), | ||
| 523 | src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); | ||
| 524 | return; | ||
| 525 | } | ||
| 526 | UNREACHABLE(); | ||
| 527 | } | ||
| 528 | |||
| 529 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); | ||
| 530 | void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { | ||
| 531 | ASSERT(type != SurfaceType::Fill); | ||
| 532 | |||
| 533 | const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); | ||
| 534 | if (texture_src_data == nullptr) | ||
| 535 | return; | ||
| 536 | |||
| 537 | if (gl_buffer == nullptr) { | ||
| 538 | gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); | ||
| 539 | gl_buffer.reset(new u8[gl_buffer_size]); | ||
| 540 | } | ||
| 541 | |||
| 542 | // TODO: Should probably be done in ::Memory:: and check for other regions too | ||
| 543 | if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) | ||
| 544 | load_end = Memory::VRAM_VADDR_END; | ||
| 545 | |||
| 546 | if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) | ||
| 547 | load_start = Memory::VRAM_VADDR; | ||
| 548 | |||
| 549 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | ||
| 550 | |||
| 551 | ASSERT(load_start >= addr && load_end <= end); | ||
| 552 | const u32 start_offset = load_start - addr; | ||
| 553 | |||
| 554 | if (!is_tiled) { | ||
| 555 | ASSERT(type == SurfaceType::Color); | ||
| 556 | std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, | ||
| 557 | load_end - load_start); | ||
| 558 | } else { | ||
| 559 | if (type == SurfaceType::Texture) { | ||
| 560 | UNIMPLEMENTED(); | ||
| 561 | } else { | ||
| 562 | morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, | ||
| 563 | load_start, load_end); | ||
| 564 | } | ||
| 565 | } | ||
| 566 | } | ||
| 567 | |||
| 568 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | ||
| 569 | void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { | ||
| 570 | u8* const dst_buffer = Memory::GetPhysicalPointer(addr); | ||
| 571 | if (dst_buffer == nullptr) | ||
| 572 | return; | ||
| 573 | |||
| 574 | ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); | ||
| 575 | |||
| 576 | // TODO: Should probably be done in ::Memory:: and check for other regions too | ||
| 577 | // same as loadglbuffer() | ||
| 578 | if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) | ||
| 579 | flush_end = Memory::VRAM_VADDR_END; | ||
| 580 | |||
| 581 | if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) | ||
| 582 | flush_start = Memory::VRAM_VADDR; | ||
| 583 | |||
| 584 | MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); | ||
| 585 | |||
| 586 | ASSERT(flush_start >= addr && flush_end <= end); | ||
| 587 | const u64 start_offset = flush_start - addr; | ||
| 588 | const u64 end_offset = flush_end - addr; | ||
| 589 | |||
| 590 | if (type == SurfaceType::Fill) { | ||
| 591 | const u64 coarse_start_offset = start_offset - (start_offset % fill_size); | ||
| 592 | const u64 backup_bytes = start_offset % fill_size; | ||
| 593 | std::array<u8, 4> backup_data; | ||
| 594 | if (backup_bytes) | ||
| 595 | std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); | ||
| 596 | |||
| 597 | for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { | ||
| 598 | std::memcpy(&dst_buffer[offset], &fill_data[0], | ||
| 599 | std::min(fill_size, end_offset - offset)); | ||
| 600 | } | ||
| 601 | |||
| 602 | if (backup_bytes) | ||
| 603 | std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); | ||
| 604 | } else if (!is_tiled) { | ||
| 605 | ASSERT(type == SurfaceType::Color); | ||
| 606 | std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); | ||
| 607 | } else { | ||
| 608 | gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, | ||
| 609 | flush_start, flush_end); | ||
| 610 | } | ||
| 611 | } | ||
| 612 | |||
| 613 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); | ||
| 614 | void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | ||
| 615 | GLuint draw_fb_handle) { | ||
| 616 | if (type == SurfaceType::Fill) | ||
| 617 | return; | ||
| 618 | |||
| 619 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | ||
| 620 | |||
| 621 | ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); | ||
| 622 | |||
| 623 | // Load data from memory to the surface | ||
| 624 | GLint x0 = static_cast<GLint>(rect.left); | ||
| 625 | GLint y0 = static_cast<GLint>(rect.bottom); | ||
| 626 | size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); | ||
| 627 | |||
| 628 | const FormatTuple& tuple = GetFormatTuple(pixel_format); | ||
| 629 | GLuint target_tex = texture.handle; | ||
| 630 | |||
| 631 | // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in | ||
| 632 | // surface | ||
| 633 | OGLTexture unscaled_tex; | ||
| 634 | if (res_scale != 1) { | ||
| 635 | x0 = 0; | ||
| 636 | y0 = 0; | ||
| 637 | |||
| 638 | unscaled_tex.Create(); | ||
| 639 | AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); | ||
| 640 | target_tex = unscaled_tex.handle; | ||
| 641 | } | ||
| 642 | |||
| 643 | OpenGLState cur_state = OpenGLState::GetCurState(); | ||
| 644 | |||
| 645 | GLuint old_tex = cur_state.texture_units[0].texture_2d; | ||
| 646 | cur_state.texture_units[0].texture_2d = target_tex; | ||
| 647 | cur_state.Apply(); | ||
| 648 | |||
| 649 | // Ensure no bad interactions with GL_UNPACK_ALIGNMENT | ||
| 650 | ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); | ||
| 651 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); | ||
| 652 | |||
| 653 | glActiveTexture(GL_TEXTURE0); | ||
| 654 | glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), | ||
| 655 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | ||
| 656 | &gl_buffer[buffer_offset]); | ||
| 657 | |||
| 658 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||
| 659 | |||
| 660 | cur_state.texture_units[0].texture_2d = old_tex; | ||
| 661 | cur_state.Apply(); | ||
| 662 | |||
| 663 | if (res_scale != 1) { | ||
| 664 | auto scaled_rect = rect; | ||
| 665 | scaled_rect.left *= res_scale; | ||
| 666 | scaled_rect.top *= res_scale; | ||
| 667 | scaled_rect.right *= res_scale; | ||
| 668 | scaled_rect.bottom *= res_scale; | ||
| 669 | |||
| 670 | BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, | ||
| 671 | scaled_rect, type, read_fb_handle, draw_fb_handle); | ||
| 672 | } | ||
| 673 | } | ||
| 674 | |||
| 675 | MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); | ||
| 676 | void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | ||
| 677 | GLuint draw_fb_handle) { | ||
| 678 | if (type == SurfaceType::Fill) | ||
| 679 | return; | ||
| 680 | |||
| 681 | MICROPROFILE_SCOPE(OpenGL_TextureDL); | ||
| 682 | |||
| 683 | if (gl_buffer == nullptr) { | ||
| 684 | gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); | ||
| 685 | gl_buffer.reset(new u8[gl_buffer_size]); | ||
| 686 | } | ||
| 687 | |||
| 688 | OpenGLState state = OpenGLState::GetCurState(); | ||
| 689 | OpenGLState prev_state = state; | ||
| 690 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 691 | |||
| 692 | const FormatTuple& tuple = GetFormatTuple(pixel_format); | ||
| 693 | |||
| 694 | // Ensure no bad interactions with GL_PACK_ALIGNMENT | ||
| 695 | ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); | ||
| 696 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); | ||
| 697 | size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); | ||
| 698 | |||
| 699 | // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush | ||
| 700 | if (res_scale != 1) { | ||
| 701 | auto scaled_rect = rect; | ||
| 702 | scaled_rect.left *= res_scale; | ||
| 703 | scaled_rect.top *= res_scale; | ||
| 704 | scaled_rect.right *= res_scale; | ||
| 705 | scaled_rect.bottom *= res_scale; | ||
| 706 | |||
| 707 | OGLTexture unscaled_tex; | ||
| 708 | unscaled_tex.Create(); | ||
| 709 | |||
| 710 | MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; | ||
| 711 | AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); | ||
| 712 | BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, | ||
| 713 | read_fb_handle, draw_fb_handle); | ||
| 714 | |||
| 715 | state.texture_units[0].texture_2d = unscaled_tex.handle; | ||
| 716 | state.Apply(); | ||
| 717 | |||
| 718 | glActiveTexture(GL_TEXTURE0); | ||
| 719 | glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||
| 720 | } else { | ||
| 721 | state.ResetTexture(texture.handle); | ||
| 722 | state.draw.read_framebuffer = read_fb_handle; | ||
| 723 | state.Apply(); | ||
| 724 | |||
| 725 | if (type == SurfaceType::Color || type == SurfaceType::Texture) { | ||
| 726 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, | ||
| 727 | texture.handle, 0); | ||
| 728 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 729 | 0, 0); | ||
| 730 | } else if (type == SurfaceType::Depth) { | ||
| 731 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 732 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 733 | texture.handle, 0); | ||
| 734 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 735 | } else { | ||
| 736 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 737 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 738 | texture.handle, 0); | ||
| 739 | } | ||
| 740 | glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), | ||
| 741 | static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), | ||
| 742 | tuple.format, tuple.type, &gl_buffer[buffer_offset]); | ||
| 743 | } | ||
| 744 | |||
| 745 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 746 | } | ||
| 747 | |||
| 748 | enum MatchFlags { | ||
| 749 | Invalid = 1, // Flag that can be applied to other match types, invalid matches require | ||
| 750 | // validation before they can be used | ||
| 751 | Exact = 1 << 1, // Surfaces perfectly match | ||
| 752 | SubRect = 1 << 2, // Surface encompasses params | ||
| 753 | Copy = 1 << 3, // Surface we can copy from | ||
| 754 | Expand = 1 << 4, // Surface that can expand params | ||
| 755 | TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters | ||
| 756 | }; | ||
| 757 | |||
| 758 | constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { | ||
| 759 | return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); | ||
| 760 | } | ||
| 761 | |||
| 762 | /// Get the best surface match (and its match type) for the given flags | ||
| 763 | template <MatchFlags find_flags> | ||
| 764 | Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, | ||
| 765 | ScaleMatch match_scale_type, | ||
| 766 | boost::optional<SurfaceInterval> validate_interval = boost::none) { | ||
| 767 | Surface match_surface = nullptr; | ||
| 768 | bool match_valid = false; | ||
| 769 | u32 match_scale = 0; | ||
| 770 | SurfaceInterval match_interval{}; | ||
| 771 | |||
| 772 | for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { | ||
| 773 | for (auto& surface : pair.second) { | ||
| 774 | bool res_scale_matched = match_scale_type == ScaleMatch::Exact | ||
| 775 | ? (params.res_scale == surface->res_scale) | ||
| 776 | : (params.res_scale <= surface->res_scale); | ||
| 777 | // validity will be checked in GetCopyableInterval | ||
| 778 | bool is_valid = | ||
| 779 | find_flags & MatchFlags::Copy | ||
| 780 | ? true | ||
| 781 | : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); | ||
| 782 | |||
| 783 | if (!(find_flags & MatchFlags::Invalid) && !is_valid) | ||
| 784 | continue; | ||
| 785 | |||
| 786 | auto IsMatch_Helper = [&](auto check_type, auto match_fn) { | ||
| 787 | if (!(find_flags & check_type)) | ||
| 788 | return; | ||
| 789 | |||
| 790 | bool matched; | ||
| 791 | SurfaceInterval surface_interval; | ||
| 792 | std::tie(matched, surface_interval) = match_fn(); | ||
| 793 | if (!matched) | ||
| 794 | return; | ||
| 795 | |||
| 796 | if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && | ||
| 797 | surface->type != SurfaceType::Fill) | ||
| 798 | return; | ||
| 799 | |||
| 800 | // Found a match, update only if this is better than the previous one | ||
| 801 | auto UpdateMatch = [&] { | ||
| 802 | match_surface = surface; | ||
| 803 | match_valid = is_valid; | ||
| 804 | match_scale = surface->res_scale; | ||
| 805 | match_interval = surface_interval; | ||
| 806 | }; | ||
| 807 | |||
| 808 | if (surface->res_scale > match_scale) { | ||
| 809 | UpdateMatch(); | ||
| 810 | return; | ||
| 811 | } else if (surface->res_scale < match_scale) { | ||
| 812 | return; | ||
| 813 | } | ||
| 814 | |||
| 815 | if (is_valid && !match_valid) { | ||
| 816 | UpdateMatch(); | ||
| 817 | return; | ||
| 818 | } else if (is_valid != match_valid) { | ||
| 819 | return; | ||
| 820 | } | ||
| 821 | |||
| 822 | if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { | ||
| 823 | UpdateMatch(); | ||
| 824 | } | ||
| 825 | }; | ||
| 826 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { | ||
| 827 | return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); | ||
| 828 | }); | ||
| 829 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { | ||
| 830 | return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); | ||
| 831 | }); | ||
| 832 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { | ||
| 833 | auto copy_interval = | ||
| 834 | params.FromInterval(*validate_interval).GetCopyableInterval(surface); | ||
| 835 | bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && | ||
| 836 | surface->CanCopy(params, copy_interval); | ||
| 837 | return std::make_pair(matched, copy_interval); | ||
| 838 | }); | ||
| 839 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { | ||
| 840 | return std::make_pair(surface->CanExpand(params), surface->GetInterval()); | ||
| 841 | }); | ||
| 842 | IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { | ||
| 843 | return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); | ||
| 844 | }); | ||
| 845 | } | ||
| 846 | } | ||
| 847 | return match_surface; | ||
| 848 | } | ||
| 849 | |||
| 850 | RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | ||
| 851 | read_framebuffer.Create(); | ||
| 852 | draw_framebuffer.Create(); | ||
| 853 | |||
| 854 | attributeless_vao.Create(); | ||
| 855 | |||
| 856 | d24s8_abgr_buffer.Create(); | ||
| 857 | d24s8_abgr_buffer_size = 0; | ||
| 858 | |||
| 859 | const char* vs_source = R"( | ||
| 860 | #version 330 core | ||
| 861 | const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); | ||
| 862 | void main() { | ||
| 863 | gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); | ||
| 864 | } | ||
| 865 | )"; | ||
| 866 | const char* fs_source = R"( | ||
| 867 | #version 330 core | ||
| 868 | |||
| 869 | uniform samplerBuffer tbo; | ||
| 870 | uniform vec2 tbo_size; | ||
| 871 | uniform vec4 viewport; | ||
| 872 | |||
| 873 | out vec4 color; | ||
| 874 | |||
| 875 | void main() { | ||
| 876 | vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; | ||
| 877 | int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); | ||
| 878 | color = texelFetch(tbo, tbo_offset).rabg; | ||
| 879 | } | ||
| 880 | )"; | ||
| 881 | d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); | ||
| 882 | |||
| 883 | OpenGLState state = OpenGLState::GetCurState(); | ||
| 884 | GLuint old_program = state.draw.shader_program; | ||
| 885 | state.draw.shader_program = d24s8_abgr_shader.handle; | ||
| 886 | state.Apply(); | ||
| 887 | |||
| 888 | GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); | ||
| 889 | ASSERT(tbo_u_id != -1); | ||
| 890 | glUniform1i(tbo_u_id, 0); | ||
| 891 | |||
| 892 | state.draw.shader_program = old_program; | ||
| 893 | state.Apply(); | ||
| 894 | |||
| 895 | d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); | ||
| 896 | ASSERT(d24s8_abgr_tbo_size_u_id != -1); | ||
| 897 | d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); | ||
| 898 | ASSERT(d24s8_abgr_viewport_u_id != -1); | ||
| 899 | } | ||
| 900 | |||
| 901 | RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | ||
| 902 | FlushAll(); | ||
| 903 | while (!surface_cache.empty()) | ||
| 904 | UnregisterSurface(*surface_cache.begin()->second.begin()); | ||
| 905 | } | ||
| 906 | |||
| 907 | bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, | ||
| 908 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 909 | const Surface& dst_surface, | ||
| 910 | const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 911 | if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) | ||
| 912 | return false; | ||
| 913 | |||
| 914 | return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, | ||
| 915 | dst_rect, src_surface->type, read_framebuffer.handle, | ||
| 916 | draw_framebuffer.handle); | ||
| 917 | } | ||
| 918 | |||
| 919 | void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, | ||
| 920 | const MathUtil::Rectangle<u32>& src_rect, | ||
| 921 | GLuint dst_tex, | ||
| 922 | const MathUtil::Rectangle<u32>& dst_rect) { | ||
| 923 | OpenGLState prev_state = OpenGLState::GetCurState(); | ||
| 924 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 925 | |||
| 926 | OpenGLState state; | ||
| 927 | state.draw.read_framebuffer = read_framebuffer.handle; | ||
| 928 | state.draw.draw_framebuffer = draw_framebuffer.handle; | ||
| 929 | state.Apply(); | ||
| 930 | |||
| 931 | glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); | ||
| 932 | |||
| 933 | GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; | ||
| 934 | if (target_pbo_size > d24s8_abgr_buffer_size) { | ||
| 935 | d24s8_abgr_buffer_size = target_pbo_size * 2; | ||
| 936 | glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); | ||
| 937 | } | ||
| 938 | |||
| 939 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 940 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, | ||
| 941 | 0); | ||
| 942 | glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), | ||
| 943 | static_cast<GLsizei>(src_rect.GetWidth()), | ||
| 944 | static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, | ||
| 945 | 0); | ||
| 946 | |||
| 947 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 948 | |||
| 949 | // PBO now contains src_tex in RABG format | ||
| 950 | state.draw.shader_program = d24s8_abgr_shader.handle; | ||
| 951 | state.draw.vertex_array = attributeless_vao.handle; | ||
| 952 | state.viewport.x = static_cast<GLint>(dst_rect.left); | ||
| 953 | state.viewport.y = static_cast<GLint>(dst_rect.bottom); | ||
| 954 | state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); | ||
| 955 | state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); | ||
| 956 | state.Apply(); | ||
| 957 | |||
| 958 | OGLTexture tbo; | ||
| 959 | tbo.Create(); | ||
| 960 | glActiveTexture(GL_TEXTURE0); | ||
| 961 | glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); | ||
| 962 | glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); | ||
| 963 | |||
| 964 | glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), | ||
| 965 | static_cast<GLfloat>(src_rect.GetHeight())); | ||
| 966 | glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), | ||
| 967 | static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), | ||
| 968 | static_cast<GLfloat>(state.viewport.height)); | ||
| 969 | |||
| 970 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); | ||
| 971 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 972 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | ||
| 973 | |||
| 974 | glBindTexture(GL_TEXTURE_BUFFER, 0); | ||
| 975 | } | ||
| 976 | |||
| 977 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, | ||
| 978 | bool load_if_create) { | ||
| 979 | if (params.addr == 0 || params.height * params.width == 0) { | ||
| 980 | return nullptr; | ||
| 981 | } | ||
| 982 | // Use GetSurfaceSubRect instead | ||
| 983 | ASSERT(params.width == params.stride); | ||
| 984 | |||
| 985 | ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); | ||
| 986 | |||
| 987 | // Check for an exact match in existing surfaces | ||
| 988 | Surface surface = | ||
| 989 | FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); | ||
| 990 | |||
| 991 | if (surface == nullptr) { | ||
| 992 | u16 target_res_scale = params.res_scale; | ||
| 993 | if (match_res_scale != ScaleMatch::Exact) { | ||
| 994 | // This surface may have a subrect of another surface with a higher res_scale, find it | ||
| 995 | // to adjust our params | ||
| 996 | SurfaceParams find_params = params; | ||
| 997 | Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( | ||
| 998 | surface_cache, find_params, match_res_scale); | ||
| 999 | if (expandable != nullptr && expandable->res_scale > target_res_scale) { | ||
| 1000 | target_res_scale = expandable->res_scale; | ||
| 1001 | } | ||
| 1002 | // Keep res_scale when reinterpreting d24s8 -> rgba8 | ||
| 1003 | if (params.pixel_format == PixelFormat::RGBA8) { | ||
| 1004 | find_params.pixel_format = PixelFormat::D24S8; | ||
| 1005 | expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( | ||
| 1006 | surface_cache, find_params, match_res_scale); | ||
| 1007 | if (expandable != nullptr && expandable->res_scale > target_res_scale) { | ||
| 1008 | target_res_scale = expandable->res_scale; | ||
| 1009 | } | ||
| 1010 | } | ||
| 1011 | } | ||
| 1012 | SurfaceParams new_params = params; | ||
| 1013 | new_params.res_scale = target_res_scale; | ||
| 1014 | surface = CreateSurface(new_params); | ||
| 1015 | RegisterSurface(surface); | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | if (load_if_create) { | ||
| 1019 | ValidateSurface(surface, params.addr, params.size); | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | return surface; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, | ||
| 1026 | ScaleMatch match_res_scale, | ||
| 1027 | bool load_if_create) { | ||
| 1028 | if (params.addr == 0 || params.height * params.width == 0) { | ||
| 1029 | return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | // Attempt to find encompassing surface | ||
| 1033 | Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, | ||
| 1034 | match_res_scale); | ||
| 1035 | |||
| 1036 | // Check if FindMatch failed because of res scaling | ||
| 1037 | // If that's the case create a new surface with | ||
| 1038 | // the dimensions of the lower res_scale surface | ||
| 1039 | // to suggest it should not be used again | ||
| 1040 | if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { | ||
| 1041 | surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, | ||
| 1042 | ScaleMatch::Ignore); | ||
| 1043 | if (surface != nullptr) { | ||
| 1044 | ASSERT(surface->res_scale < params.res_scale); | ||
| 1045 | SurfaceParams new_params = *surface; | ||
| 1046 | new_params.res_scale = params.res_scale; | ||
| 1047 | |||
| 1048 | surface = CreateSurface(new_params); | ||
| 1049 | RegisterSurface(surface); | ||
| 1050 | } | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | SurfaceParams aligned_params = params; | ||
| 1054 | if (params.is_tiled) { | ||
| 1055 | aligned_params.height = Common::AlignUp(params.height, 8); | ||
| 1056 | aligned_params.width = Common::AlignUp(params.width, 8); | ||
| 1057 | aligned_params.stride = Common::AlignUp(params.stride, 8); | ||
| 1058 | aligned_params.UpdateParams(); | ||
| 1059 | } | ||
| 1060 | |||
| 1061 | // Check for a surface we can expand before creating a new one | ||
| 1062 | if (surface == nullptr) { | ||
| 1063 | surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, | ||
| 1064 | match_res_scale); | ||
| 1065 | if (surface != nullptr) { | ||
| 1066 | aligned_params.width = aligned_params.stride; | ||
| 1067 | aligned_params.UpdateParams(); | ||
| 1068 | |||
| 1069 | SurfaceParams new_params = *surface; | ||
| 1070 | new_params.addr = std::min(aligned_params.addr, surface->addr); | ||
| 1071 | new_params.end = std::max(aligned_params.end, surface->end); | ||
| 1072 | new_params.size = new_params.end - new_params.addr; | ||
| 1073 | new_params.height = static_cast<u32>( | ||
| 1074 | new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); | ||
| 1075 | ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); | ||
| 1076 | |||
| 1077 | Surface new_surface = CreateSurface(new_params); | ||
| 1078 | DuplicateSurface(surface, new_surface); | ||
| 1079 | |||
| 1080 | // Delete the expanded surface, this can't be done safely yet | ||
| 1081 | // because it may still be in use | ||
| 1082 | remove_surfaces.emplace(surface); | ||
| 1083 | |||
| 1084 | surface = new_surface; | ||
| 1085 | RegisterSurface(new_surface); | ||
| 1086 | } | ||
| 1087 | } | ||
| 1088 | |||
| 1089 | // No subrect found - create and return a new surface | ||
| 1090 | if (surface == nullptr) { | ||
| 1091 | SurfaceParams new_params = aligned_params; | ||
| 1092 | // Can't have gaps in a surface | ||
| 1093 | new_params.width = aligned_params.stride; | ||
| 1094 | new_params.UpdateParams(); | ||
| 1095 | // GetSurface will create the new surface and possibly adjust res_scale if necessary | ||
| 1096 | surface = GetSurface(new_params, match_res_scale, load_if_create); | ||
| 1097 | } else if (load_if_create) { | ||
| 1098 | ValidateSurface(surface, aligned_params.addr, aligned_params.size); | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | return std::make_tuple(surface, surface->GetScaledSubRect(params)); | ||
| 1102 | } | ||
| 1103 | |||
| 1104 | Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { | ||
| 1105 | UNIMPLEMENTED(); | ||
| 1106 | return {}; | ||
| 1107 | } | ||
| 1108 | |||
| 1109 | SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( | ||
| 1110 | bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) { | ||
| 1111 | UNIMPLEMENTED(); | ||
| 1112 | return {}; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { | ||
| 1116 | UNIMPLEMENTED(); | ||
| 1117 | return {}; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { | ||
| 1121 | MathUtil::Rectangle<u32> rect{}; | ||
| 1122 | |||
| 1123 | Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( | ||
| 1124 | surface_cache, params, ScaleMatch::Ignore); | ||
| 1125 | |||
| 1126 | if (match_surface != nullptr) { | ||
| 1127 | ValidateSurface(match_surface, params.addr, params.size); | ||
| 1128 | |||
| 1129 | SurfaceParams match_subrect; | ||
| 1130 | if (params.width != params.stride) { | ||
| 1131 | const u32 tiled_size = match_surface->is_tiled ? 8 : 1; | ||
| 1132 | match_subrect = params; | ||
| 1133 | match_subrect.width = | ||
| 1134 | static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); | ||
| 1135 | match_subrect.stride = | ||
| 1136 | static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); | ||
| 1137 | match_subrect.height *= tiled_size; | ||
| 1138 | } else { | ||
| 1139 | match_subrect = match_surface->FromInterval(params.GetInterval()); | ||
| 1140 | ASSERT(match_subrect.GetInterval() == params.GetInterval()); | ||
| 1141 | } | ||
| 1142 | |||
| 1143 | rect = match_surface->GetScaledSubRect(match_subrect); | ||
| 1144 | } | ||
| 1145 | |||
| 1146 | return std::make_tuple(match_surface, rect); | ||
| 1147 | } | ||
| 1148 | |||
| 1149 | void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, | ||
| 1150 | const Surface& dest_surface) { | ||
| 1151 | ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); | ||
| 1152 | |||
| 1153 | BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, | ||
| 1154 | dest_surface->GetScaledSubRect(*src_surface)); | ||
| 1155 | |||
| 1156 | dest_surface->invalid_regions -= src_surface->GetInterval(); | ||
| 1157 | dest_surface->invalid_regions += src_surface->invalid_regions; | ||
| 1158 | |||
| 1159 | SurfaceRegions regions; | ||
| 1160 | for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { | ||
| 1161 | if (pair.second == src_surface) { | ||
| 1162 | regions += pair.first; | ||
| 1163 | } | ||
| 1164 | } | ||
| 1165 | for (auto& interval : regions) { | ||
| 1166 | dirty_regions.set({interval, dest_surface}); | ||
| 1167 | } | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u64 size) { | ||
| 1171 | if (size == 0) | ||
| 1172 | return; | ||
| 1173 | |||
| 1174 | const SurfaceInterval validate_interval(addr, addr + size); | ||
| 1175 | |||
| 1176 | if (surface->type == SurfaceType::Fill) { | ||
| 1177 | // Sanity check, fill surfaces will always be valid when used | ||
| 1178 | ASSERT(surface->IsRegionValid(validate_interval)); | ||
| 1179 | return; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | while (true) { | ||
| 1183 | const auto it = surface->invalid_regions.find(validate_interval); | ||
| 1184 | if (it == surface->invalid_regions.end()) | ||
| 1185 | break; | ||
| 1186 | |||
| 1187 | const auto interval = *it & validate_interval; | ||
| 1188 | // Look for a valid surface to copy from | ||
| 1189 | SurfaceParams params = surface->FromInterval(interval); | ||
| 1190 | |||
| 1191 | Surface copy_surface = | ||
| 1192 | FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); | ||
| 1193 | if (copy_surface != nullptr) { | ||
| 1194 | SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); | ||
| 1195 | CopySurface(copy_surface, surface, copy_interval); | ||
| 1196 | surface->invalid_regions.erase(copy_interval); | ||
| 1197 | continue; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | // D24S8 to RGBA8 | ||
| 1201 | if (surface->pixel_format == PixelFormat::RGBA8) { | ||
| 1202 | params.pixel_format = PixelFormat::D24S8; | ||
| 1203 | Surface reinterpret_surface = | ||
| 1204 | FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); | ||
| 1205 | if (reinterpret_surface != nullptr) { | ||
| 1206 | ASSERT(reinterpret_surface->pixel_format == PixelFormat::D24S8); | ||
| 1207 | |||
| 1208 | SurfaceInterval convert_interval = params.GetCopyableInterval(reinterpret_surface); | ||
| 1209 | SurfaceParams convert_params = surface->FromInterval(convert_interval); | ||
| 1210 | auto src_rect = reinterpret_surface->GetScaledSubRect(convert_params); | ||
| 1211 | auto dest_rect = surface->GetScaledSubRect(convert_params); | ||
| 1212 | |||
| 1213 | ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect, | ||
| 1214 | surface->texture.handle, dest_rect); | ||
| 1215 | |||
| 1216 | surface->invalid_regions.erase(convert_interval); | ||
| 1217 | continue; | ||
| 1218 | } | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | // Load data from 3DS memory | ||
| 1222 | FlushRegion(params.addr, params.size); | ||
| 1223 | surface->LoadGLBuffer(params.addr, params.end); | ||
| 1224 | surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, | ||
| 1225 | draw_framebuffer.handle); | ||
| 1226 | surface->invalid_regions.erase(params.GetInterval()); | ||
| 1227 | } | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surface) { | ||
| 1231 | if (size == 0) | ||
| 1232 | return; | ||
| 1233 | |||
| 1234 | const SurfaceInterval flush_interval(addr, addr + size); | ||
| 1235 | SurfaceRegions flushed_intervals; | ||
| 1236 | |||
| 1237 | for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { | ||
| 1238 | // small sizes imply that this most likely comes from the cpu, flush the entire region | ||
| 1239 | // the point is to avoid thousands of small writes every frame if the cpu decides to access | ||
| 1240 | // that region, anything higher than 8 you're guaranteed it comes from a service | ||
| 1241 | const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; | ||
| 1242 | auto& surface = pair.second; | ||
| 1243 | |||
| 1244 | if (flush_surface != nullptr && surface != flush_surface) | ||
| 1245 | continue; | ||
| 1246 | |||
| 1247 | // Sanity check, this surface is the last one that marked this region dirty | ||
| 1248 | ASSERT(surface->IsRegionValid(interval)); | ||
| 1249 | |||
| 1250 | if (surface->type != SurfaceType::Fill) { | ||
| 1251 | SurfaceParams params = surface->FromInterval(interval); | ||
| 1252 | surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, | ||
| 1253 | draw_framebuffer.handle); | ||
| 1254 | } | ||
| 1255 | surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); | ||
| 1256 | flushed_intervals += interval; | ||
| 1257 | } | ||
| 1258 | // Reset dirty regions | ||
| 1259 | dirty_regions -= flushed_intervals; | ||
| 1260 | } | ||
| 1261 | |||
| 1262 | void RasterizerCacheOpenGL::FlushAll() { | ||
| 1263 | FlushRegion(0, 0xFFFFFFFF); | ||
| 1264 | } | ||
| 1265 | |||
| 1266 | void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner) { | ||
| 1267 | if (size == 0) | ||
| 1268 | return; | ||
| 1269 | |||
| 1270 | const SurfaceInterval invalid_interval(addr, addr + size); | ||
| 1271 | |||
| 1272 | if (region_owner != nullptr) { | ||
| 1273 | ASSERT(region_owner->type != SurfaceType::Texture); | ||
| 1274 | ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); | ||
| 1275 | // Surfaces can't have a gap | ||
| 1276 | ASSERT(region_owner->width == region_owner->stride); | ||
| 1277 | region_owner->invalid_regions.erase(invalid_interval); | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { | ||
| 1281 | for (auto& cached_surface : pair.second) { | ||
| 1282 | if (cached_surface == region_owner) | ||
| 1283 | continue; | ||
| 1284 | |||
| 1285 | // If cpu is invalidating this region we want to remove it | ||
| 1286 | // to (likely) mark the memory pages as uncached | ||
| 1287 | if (region_owner == nullptr && size <= 8) { | ||
| 1288 | FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); | ||
| 1289 | remove_surfaces.emplace(cached_surface); | ||
| 1290 | continue; | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | const auto interval = cached_surface->GetInterval() & invalid_interval; | ||
| 1294 | cached_surface->invalid_regions.insert(interval); | ||
| 1295 | |||
| 1296 | // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures | ||
| 1297 | if (cached_surface->type == SurfaceType::Fill && | ||
| 1298 | cached_surface->IsSurfaceFullyInvalid()) { | ||
| 1299 | remove_surfaces.emplace(cached_surface); | ||
| 1300 | } | ||
| 1301 | } | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | if (region_owner != nullptr) | ||
| 1305 | dirty_regions.set({invalid_interval, region_owner}); | ||
| 1306 | else | ||
| 1307 | dirty_regions.erase(invalid_interval); | ||
| 1308 | |||
| 1309 | for (auto& remove_surface : remove_surfaces) { | ||
| 1310 | if (remove_surface == region_owner) { | ||
| 1311 | Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( | ||
| 1312 | surface_cache, *region_owner, ScaleMatch::Ignore); | ||
| 1313 | ASSERT(expanded_surface); | ||
| 1314 | |||
| 1315 | if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { | ||
| 1316 | DuplicateSurface(region_owner, expanded_surface); | ||
| 1317 | } else { | ||
| 1318 | continue; | ||
| 1319 | } | ||
| 1320 | } | ||
| 1321 | UnregisterSurface(remove_surface); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | remove_surfaces.clear(); | ||
| 1325 | } | ||
| 1326 | |||
| 1327 | Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { | ||
| 1328 | Surface surface = std::make_shared<CachedSurface>(); | ||
| 1329 | static_cast<SurfaceParams&>(*surface) = params; | ||
| 1330 | |||
| 1331 | surface->texture.Create(); | ||
| 1332 | |||
| 1333 | surface->gl_buffer_size = 0; | ||
| 1334 | surface->invalid_regions.insert(surface->GetInterval()); | ||
| 1335 | AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), | ||
| 1336 | surface->GetScaledWidth(), surface->GetScaledHeight()); | ||
| 1337 | |||
| 1338 | return surface; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { | ||
| 1342 | if (surface->registered) { | ||
| 1343 | return; | ||
| 1344 | } | ||
| 1345 | surface->registered = true; | ||
| 1346 | surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); | ||
| 1347 | UpdatePagesCachedCount(surface->addr, surface->size, 1); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { | ||
| 1351 | if (!surface->registered) { | ||
| 1352 | return; | ||
| 1353 | } | ||
| 1354 | surface->registered = false; | ||
| 1355 | UpdatePagesCachedCount(surface->addr, surface->size, -1); | ||
| 1356 | surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) { | ||
| 1360 | UNIMPLEMENTED(); | ||
| 1361 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h new file mode 100644 index 000000000..17ce0fee7 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -0,0 +1,350 @@ | |||
| 1 | // Copyright 2015 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <set> | ||
| 10 | #include <tuple> | ||
| 11 | #ifdef __GNUC__ | ||
| 12 | #pragma GCC diagnostic push | ||
| 13 | #pragma GCC diagnostic ignored "-Wunused-local-typedefs" | ||
| 14 | #endif | ||
| 15 | #include <boost/icl/interval_map.hpp> | ||
| 16 | #include <boost/icl/interval_set.hpp> | ||
| 17 | #ifdef __GNUC__ | ||
| 18 | #pragma GCC diagnostic pop | ||
| 19 | #endif | ||
| 20 | #include <glad/glad.h> | ||
| 21 | #include "common/assert.h" | ||
| 22 | #include "common/common_funcs.h" | ||
| 23 | #include "common/common_types.h" | ||
| 24 | #include "common/math_util.h" | ||
| 25 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 26 | |||
| 27 | struct CachedSurface; | ||
| 28 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 29 | using SurfaceSet = std::set<Surface>; | ||
| 30 | |||
| 31 | using SurfaceRegions = boost::icl::interval_set<PAddr>; | ||
| 32 | using SurfaceMap = boost::icl::interval_map<PAddr, Surface>; | ||
| 33 | using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>; | ||
| 34 | |||
| 35 | using SurfaceInterval = SurfaceCache::interval_type; | ||
| 36 | static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && | ||
| 37 | std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), | ||
| 38 | "incorrect interval types"); | ||
| 39 | |||
| 40 | using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; | ||
| 41 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; | ||
| 42 | |||
| 43 | using PageMap = boost::icl::interval_map<u32, int>; | ||
| 44 | |||
| 45 | enum class ScaleMatch { | ||
| 46 | Exact, // only accept same res scale | ||
| 47 | Upscale, // only allow higher scale than params | ||
| 48 | Ignore // accept every scaled res | ||
| 49 | }; | ||
| 50 | |||
| 51 | struct SurfaceParams { | ||
| 52 | enum class PixelFormat { | ||
| 53 | // First 5 formats are shared between textures and color buffers | ||
| 54 | RGBA8 = 0, | ||
| 55 | RGB8 = 1, | ||
| 56 | RGB5A1 = 2, | ||
| 57 | RGB565 = 3, | ||
| 58 | RGBA4 = 4, | ||
| 59 | |||
| 60 | // Texture-only formats | ||
| 61 | IA8 = 5, | ||
| 62 | RG8 = 6, | ||
| 63 | I8 = 7, | ||
| 64 | A8 = 8, | ||
| 65 | IA4 = 9, | ||
| 66 | I4 = 10, | ||
| 67 | A4 = 11, | ||
| 68 | ETC1 = 12, | ||
| 69 | ETC1A4 = 13, | ||
| 70 | |||
| 71 | // Depth buffer-only formats | ||
| 72 | D16 = 14, | ||
| 73 | // gap | ||
| 74 | D24 = 16, | ||
| 75 | D24S8 = 17, | ||
| 76 | |||
| 77 | Invalid = 255, | ||
| 78 | }; | ||
| 79 | |||
| 80 | enum class SurfaceType { | ||
| 81 | Color = 0, | ||
| 82 | Texture = 1, | ||
| 83 | Depth = 2, | ||
| 84 | DepthStencil = 3, | ||
| 85 | Fill = 4, | ||
| 86 | Invalid = 5 | ||
| 87 | }; | ||
| 88 | |||
| 89 | static constexpr unsigned int GetFormatBpp(PixelFormat format) { | ||
| 90 | constexpr std::array<unsigned int, 18> bpp_table = { | ||
| 91 | 32, // RGBA8 | ||
| 92 | 24, // RGB8 | ||
| 93 | 16, // RGB5A1 | ||
| 94 | 16, // RGB565 | ||
| 95 | 16, // RGBA4 | ||
| 96 | 16, // IA8 | ||
| 97 | 16, // RG8 | ||
| 98 | 8, // I8 | ||
| 99 | 8, // A8 | ||
| 100 | 8, // IA4 | ||
| 101 | 4, // I4 | ||
| 102 | 4, // A4 | ||
| 103 | 4, // ETC1 | ||
| 104 | 8, // ETC1A4 | ||
| 105 | 16, // D16 | ||
| 106 | 0, | ||
| 107 | 24, // D24 | ||
| 108 | 32, // D24S8 | ||
| 109 | }; | ||
| 110 | |||
| 111 | assert(static_cast<size_t>(format) < bpp_table.size()); | ||
| 112 | return bpp_table[static_cast<size_t>(format)]; | ||
| 113 | } | ||
| 114 | unsigned int GetFormatBpp() const { | ||
| 115 | return GetFormatBpp(pixel_format); | ||
| 116 | } | ||
| 117 | |||
| 118 | static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { | ||
| 119 | SurfaceType a_type = GetFormatType(pixel_format_a); | ||
| 120 | SurfaceType b_type = GetFormatType(pixel_format_b); | ||
| 121 | |||
| 122 | if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && | ||
| 123 | (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { | ||
| 124 | return true; | ||
| 125 | } | ||
| 126 | |||
| 127 | if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { | ||
| 128 | return true; | ||
| 129 | } | ||
| 130 | |||
| 131 | if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { | ||
| 132 | return true; | ||
| 133 | } | ||
| 134 | |||
| 135 | return false; | ||
| 136 | } | ||
| 137 | |||
| 138 | static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { | ||
| 139 | if ((unsigned int)pixel_format < 5) { | ||
| 140 | return SurfaceType::Color; | ||
| 141 | } | ||
| 142 | |||
| 143 | if ((unsigned int)pixel_format < 14) { | ||
| 144 | return SurfaceType::Texture; | ||
| 145 | } | ||
| 146 | |||
| 147 | if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { | ||
| 148 | return SurfaceType::Depth; | ||
| 149 | } | ||
| 150 | |||
| 151 | if (pixel_format == PixelFormat::D24S8) { | ||
| 152 | return SurfaceType::DepthStencil; | ||
| 153 | } | ||
| 154 | |||
| 155 | return SurfaceType::Invalid; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" | ||
| 159 | /// and "pixel_format" | ||
| 160 | void UpdateParams() { | ||
| 161 | if (stride == 0) { | ||
| 162 | stride = width; | ||
| 163 | } | ||
| 164 | type = GetFormatType(pixel_format); | ||
| 165 | size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) | ||
| 166 | : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); | ||
| 167 | end = addr + size; | ||
| 168 | } | ||
| 169 | |||
| 170 | SurfaceInterval GetInterval() const { | ||
| 171 | return SurfaceInterval::right_open(addr, end); | ||
| 172 | } | ||
| 173 | |||
| 174 | // Returns the outer rectangle containing "interval" | ||
| 175 | SurfaceParams FromInterval(SurfaceInterval interval) const; | ||
| 176 | |||
| 177 | SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; | ||
| 178 | |||
| 179 | // Returns the region of the biggest valid rectange within interval | ||
| 180 | SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; | ||
| 181 | |||
| 182 | u32 GetScaledWidth() const { | ||
| 183 | return width * res_scale; | ||
| 184 | } | ||
| 185 | |||
| 186 | u32 GetScaledHeight() const { | ||
| 187 | return height * res_scale; | ||
| 188 | } | ||
| 189 | |||
| 190 | MathUtil::Rectangle<u32> GetRect() const { | ||
| 191 | return {0, height, width, 0}; | ||
| 192 | } | ||
| 193 | |||
| 194 | MathUtil::Rectangle<u32> GetScaledRect() const { | ||
| 195 | return {0, GetScaledHeight(), GetScaledWidth(), 0}; | ||
| 196 | } | ||
| 197 | |||
| 198 | u64 PixelsInBytes(u64 size) const { | ||
| 199 | return size * CHAR_BIT / GetFormatBpp(pixel_format); | ||
| 200 | } | ||
| 201 | |||
| 202 | u64 BytesInPixels(u64 pixels) const { | ||
| 203 | return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; | ||
| 204 | } | ||
| 205 | |||
| 206 | bool ExactMatch(const SurfaceParams& other_surface) const; | ||
| 207 | bool CanSubRect(const SurfaceParams& sub_surface) const; | ||
| 208 | bool CanExpand(const SurfaceParams& expanded_surface) const; | ||
| 209 | bool CanTexCopy(const SurfaceParams& texcopy_params) const; | ||
| 210 | |||
| 211 | MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; | ||
| 212 | MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; | ||
| 213 | |||
| 214 | PAddr addr = 0; | ||
| 215 | PAddr end = 0; | ||
| 216 | u64 size = 0; | ||
| 217 | |||
| 218 | u32 width = 0; | ||
| 219 | u32 height = 0; | ||
| 220 | u32 stride = 0; | ||
| 221 | u16 res_scale = 1; | ||
| 222 | |||
| 223 | bool is_tiled = false; | ||
| 224 | PixelFormat pixel_format = PixelFormat::Invalid; | ||
| 225 | SurfaceType type = SurfaceType::Invalid; | ||
| 226 | }; | ||
| 227 | |||
| 228 | struct CachedSurface : SurfaceParams { | ||
| 229 | bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; | ||
| 230 | bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; | ||
| 231 | |||
| 232 | bool IsRegionValid(SurfaceInterval interval) const { | ||
| 233 | return (invalid_regions.find(interval) == invalid_regions.end()); | ||
| 234 | } | ||
| 235 | |||
| 236 | bool IsSurfaceFullyInvalid() const { | ||
| 237 | return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); | ||
| 238 | } | ||
| 239 | |||
| 240 | bool registered = false; | ||
| 241 | SurfaceRegions invalid_regions; | ||
| 242 | |||
| 243 | u64 fill_size = 0; /// Number of bytes to read from fill_data | ||
| 244 | std::array<u8, 4> fill_data; | ||
| 245 | |||
| 246 | OGLTexture texture; | ||
| 247 | |||
| 248 | static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { | ||
| 249 | // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type | ||
| 250 | return format == PixelFormat::Invalid | ||
| 251 | ? 0 | ||
| 252 | : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) | ||
| 253 | ? 4 | ||
| 254 | : SurfaceParams::GetFormatBpp(format) / 8; | ||
| 255 | } | ||
| 256 | |||
| 257 | std::unique_ptr<u8[]> gl_buffer; | ||
| 258 | size_t gl_buffer_size = 0; | ||
| 259 | |||
| 260 | // Read/Write data in 3DS memory to/from gl_buffer | ||
| 261 | void LoadGLBuffer(PAddr load_start, PAddr load_end); | ||
| 262 | void FlushGLBuffer(PAddr flush_start, PAddr flush_end); | ||
| 263 | |||
| 264 | // Upload/Download data in gl_buffer in/to this surface's texture | ||
| 265 | void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | ||
| 266 | GLuint draw_fb_handle); | ||
| 267 | void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, | ||
| 268 | GLuint draw_fb_handle); | ||
| 269 | }; | ||
| 270 | |||
| 271 | class RasterizerCacheOpenGL : NonCopyable { | ||
| 272 | public: | ||
| 273 | RasterizerCacheOpenGL(); | ||
| 274 | ~RasterizerCacheOpenGL(); | ||
| 275 | |||
| 276 | /// Blit one surface's texture to another | ||
| 277 | bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, | ||
| 278 | const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); | ||
| 279 | |||
| 280 | void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, | ||
| 281 | GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); | ||
| 282 | |||
| 283 | /// Copy one surface's region to another | ||
| 284 | void CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 285 | SurfaceInterval copy_interval); | ||
| 286 | |||
| 287 | /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) | ||
| 288 | Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, | ||
| 289 | bool load_if_create); | ||
| 290 | |||
| 291 | /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from | ||
| 292 | /// 3DS memory to OpenGL and caches it (if not already cached) | ||
| 293 | SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, | ||
| 294 | bool load_if_create); | ||
| 295 | |||
| 296 | /// Get a surface based on the texture configuration | ||
| 297 | Surface GetTextureSurface(const void* config); | ||
| 298 | |||
| 299 | /// Get the color and depth surfaces based on the framebuffer configuration | ||
| 300 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, | ||
| 301 | const MathUtil::Rectangle<s32>& viewport_rect); | ||
| 302 | |||
| 303 | /// Get a surface that matches the fill config | ||
| 304 | Surface GetFillSurface(const void* config); | ||
| 305 | |||
| 306 | /// Get a surface that matches a "texture copy" display transfer config | ||
| 307 | SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); | ||
| 308 | |||
| 309 | /// Write any cached resources overlapping the region back to memory (if dirty) | ||
| 310 | void FlushRegion(PAddr addr, u64 size, Surface flush_surface = nullptr); | ||
| 311 | |||
| 312 | /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) | ||
| 313 | void InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner); | ||
| 314 | |||
| 315 | /// Flush all cached resources tracked by this cache manager | ||
| 316 | void FlushAll(); | ||
| 317 | |||
| 318 | private: | ||
| 319 | void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); | ||
| 320 | |||
| 321 | /// Update surface's texture for given region when necessary | ||
| 322 | void ValidateSurface(const Surface& surface, PAddr addr, u64 size); | ||
| 323 | |||
| 324 | /// Create a new surface | ||
| 325 | Surface CreateSurface(const SurfaceParams& params); | ||
| 326 | |||
| 327 | /// Register surface into the cache | ||
| 328 | void RegisterSurface(const Surface& surface); | ||
| 329 | |||
| 330 | /// Remove surface from the cache | ||
| 331 | void UnregisterSurface(const Surface& surface); | ||
| 332 | |||
| 333 | /// Increase/decrease the number of surface in pages touching the specified region | ||
| 334 | void UpdatePagesCachedCount(PAddr addr, u64 size, int delta); | ||
| 335 | |||
| 336 | SurfaceCache surface_cache; | ||
| 337 | PageMap cached_pages; | ||
| 338 | SurfaceMap dirty_regions; | ||
| 339 | SurfaceSet remove_surfaces; | ||
| 340 | |||
| 341 | OGLFramebuffer read_framebuffer; | ||
| 342 | OGLFramebuffer draw_framebuffer; | ||
| 343 | |||
| 344 | OGLVertexArray attributeless_vao; | ||
| 345 | OGLBuffer d24s8_abgr_buffer; | ||
| 346 | GLsizeiptr d24s8_abgr_buffer_size; | ||
| 347 | OGLShader d24s8_abgr_shader; | ||
| 348 | GLint d24s8_abgr_tbo_size_u_id; | ||
| 349 | GLint d24s8_abgr_viewport_u_id; | ||
| 350 | }; | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 13301ec9f..7da5e74d1 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -36,7 +36,7 @@ public: | |||
| 36 | if (handle == 0) | 36 | if (handle == 0) |
| 37 | return; | 37 | return; |
| 38 | glDeleteTextures(1, &handle); | 38 | glDeleteTextures(1, &handle); |
| 39 | OpenGLState::ResetTexture(handle); | 39 | OpenGLState::GetCurState().ResetTexture(handle).Apply(); |
| 40 | handle = 0; | 40 | handle = 0; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| @@ -69,7 +69,7 @@ public: | |||
| 69 | if (handle == 0) | 69 | if (handle == 0) |
| 70 | return; | 70 | return; |
| 71 | glDeleteSamplers(1, &handle); | 71 | glDeleteSamplers(1, &handle); |
| 72 | OpenGLState::ResetSampler(handle); | 72 | OpenGLState::GetCurState().ResetSampler(handle).Apply(); |
| 73 | handle = 0; | 73 | handle = 0; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| @@ -91,10 +91,13 @@ public: | |||
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | /// Creates a new internal OpenGL resource and stores the handle | 93 | /// Creates a new internal OpenGL resource and stores the handle |
| 94 | void Create(const char* vert_shader, const char* frag_shader) { | 94 | void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, |
| 95 | const std::vector<const char*>& feedback_vars = {}, | ||
| 96 | bool separable_program = false) { | ||
| 95 | if (handle != 0) | 97 | if (handle != 0) |
| 96 | return; | 98 | return; |
| 97 | handle = GLShader::LoadProgram(vert_shader, frag_shader); | 99 | handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, |
| 100 | separable_program); | ||
| 98 | } | 101 | } |
| 99 | 102 | ||
| 100 | /// Deletes the internal OpenGL resource | 103 | /// Deletes the internal OpenGL resource |
| @@ -102,7 +105,40 @@ public: | |||
| 102 | if (handle == 0) | 105 | if (handle == 0) |
| 103 | return; | 106 | return; |
| 104 | glDeleteProgram(handle); | 107 | glDeleteProgram(handle); |
| 105 | OpenGLState::ResetProgram(handle); | 108 | OpenGLState::GetCurState().ResetProgram(handle).Apply(); |
| 109 | handle = 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | GLuint handle = 0; | ||
| 113 | }; | ||
| 114 | |||
| 115 | class OGLPipeline : private NonCopyable { | ||
| 116 | public: | ||
| 117 | OGLPipeline() = default; | ||
| 118 | OGLPipeline(OGLPipeline&& o) { | ||
| 119 | handle = std::exchange<GLuint>(o.handle, 0); | ||
| 120 | } | ||
| 121 | ~OGLPipeline() { | ||
| 122 | Release(); | ||
| 123 | } | ||
| 124 | OGLPipeline& operator=(OGLPipeline&& o) { | ||
| 125 | handle = std::exchange<GLuint>(o.handle, 0); | ||
| 126 | return *this; | ||
| 127 | } | ||
| 128 | |||
| 129 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 130 | void Create() { | ||
| 131 | if (handle != 0) | ||
| 132 | return; | ||
| 133 | glGenProgramPipelines(1, &handle); | ||
| 134 | } | ||
| 135 | |||
| 136 | /// Deletes the internal OpenGL resource | ||
| 137 | void Release() { | ||
| 138 | if (handle == 0) | ||
| 139 | return; | ||
| 140 | glDeleteProgramPipelines(1, &handle); | ||
| 141 | OpenGLState::GetCurState().ResetPipeline(handle).Apply(); | ||
| 106 | handle = 0; | 142 | handle = 0; |
| 107 | } | 143 | } |
| 108 | 144 | ||
| @@ -135,13 +171,46 @@ public: | |||
| 135 | if (handle == 0) | 171 | if (handle == 0) |
| 136 | return; | 172 | return; |
| 137 | glDeleteBuffers(1, &handle); | 173 | glDeleteBuffers(1, &handle); |
| 138 | OpenGLState::ResetBuffer(handle); | 174 | OpenGLState::GetCurState().ResetBuffer(handle).Apply(); |
| 139 | handle = 0; | 175 | handle = 0; |
| 140 | } | 176 | } |
| 141 | 177 | ||
| 142 | GLuint handle = 0; | 178 | GLuint handle = 0; |
| 143 | }; | 179 | }; |
| 144 | 180 | ||
| 181 | class OGLSync : private NonCopyable { | ||
| 182 | public: | ||
| 183 | OGLSync() = default; | ||
| 184 | |||
| 185 | OGLSync(OGLSync&& o) : handle(std::exchange(o.handle, nullptr)) {} | ||
| 186 | |||
| 187 | ~OGLSync() { | ||
| 188 | Release(); | ||
| 189 | } | ||
| 190 | OGLSync& operator=(OGLSync&& o) { | ||
| 191 | Release(); | ||
| 192 | handle = std::exchange(o.handle, nullptr); | ||
| 193 | return *this; | ||
| 194 | } | ||
| 195 | |||
| 196 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 197 | void Create() { | ||
| 198 | if (handle != 0) | ||
| 199 | return; | ||
| 200 | handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); | ||
| 201 | } | ||
| 202 | |||
| 203 | /// Deletes the internal OpenGL resource | ||
| 204 | void Release() { | ||
| 205 | if (handle == 0) | ||
| 206 | return; | ||
| 207 | glDeleteSync(handle); | ||
| 208 | handle = 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | GLsync handle = 0; | ||
| 212 | }; | ||
| 213 | |||
| 145 | class OGLVertexArray : private NonCopyable { | 214 | class OGLVertexArray : private NonCopyable { |
| 146 | public: | 215 | public: |
| 147 | OGLVertexArray() = default; | 216 | OGLVertexArray() = default; |
| @@ -168,7 +237,7 @@ public: | |||
| 168 | if (handle == 0) | 237 | if (handle == 0) |
| 169 | return; | 238 | return; |
| 170 | glDeleteVertexArrays(1, &handle); | 239 | glDeleteVertexArrays(1, &handle); |
| 171 | OpenGLState::ResetVertexArray(handle); | 240 | OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); |
| 172 | handle = 0; | 241 | handle = 0; |
| 173 | } | 242 | } |
| 174 | 243 | ||
| @@ -201,7 +270,7 @@ public: | |||
| 201 | if (handle == 0) | 270 | if (handle == 0) |
| 202 | return; | 271 | return; |
| 203 | glDeleteFramebuffers(1, &handle); | 272 | glDeleteFramebuffers(1, &handle); |
| 204 | OpenGLState::ResetFramebuffer(handle); | 273 | OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); |
| 205 | handle = 0; | 274 | handle = 0; |
| 206 | } | 275 | } |
| 207 | 276 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp new file mode 100644 index 000000000..0e0ef18cc --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <queue> | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 10 | |||
| 11 | namespace Maxwell3D { | ||
| 12 | namespace Shader { | ||
| 13 | namespace Decompiler { | ||
| 14 | |||
| 15 | constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; | ||
| 16 | |||
| 17 | class Impl { | ||
| 18 | public: | ||
| 19 | Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | ||
| 20 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, | ||
| 21 | const std::function<std::string(u32)>& inputreg_getter, | ||
| 22 | const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, | ||
| 23 | const std::string& emit_cb, const std::string& setemit_cb) | ||
| 24 | : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), | ||
| 25 | inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), | ||
| 26 | sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} | ||
| 27 | |||
| 28 | std::string Decompile() { | ||
| 29 | UNIMPLEMENTED(); | ||
| 30 | return {}; | ||
| 31 | } | ||
| 32 | |||
| 33 | private: | ||
| 34 | const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; | ||
| 35 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; | ||
| 36 | u32 main_offset; | ||
| 37 | const std::function<std::string(u32)>& inputreg_getter; | ||
| 38 | const std::function<std::string(u32)>& outputreg_getter; | ||
| 39 | bool sanitize_mul; | ||
| 40 | const std::string& emit_cb; | ||
| 41 | const std::string& setemit_cb; | ||
| 42 | }; | ||
| 43 | |||
| 44 | std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | ||
| 45 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, | ||
| 46 | u32 main_offset, | ||
| 47 | const std::function<std::string(u32)>& inputreg_getter, | ||
| 48 | const std::function<std::string(u32)>& outputreg_getter, | ||
| 49 | bool sanitize_mul, const std::string& emit_cb, | ||
| 50 | const std::string& setemit_cb) { | ||
| 51 | Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, | ||
| 52 | sanitize_mul, emit_cb, setemit_cb); | ||
| 53 | return impl.Decompile(); | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace Decompiler | ||
| 57 | } // namespace Shader | ||
| 58 | } // namespace Maxwell3D | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h new file mode 100644 index 000000000..02ebfcbe8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <functional> | ||
| 7 | #include <string> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Maxwell3D { | ||
| 11 | namespace Shader { | ||
| 12 | namespace Decompiler { | ||
| 13 | |||
| 14 | constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; | ||
| 15 | constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; | ||
| 16 | |||
| 17 | std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, | ||
| 18 | const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, | ||
| 19 | u32 main_offset, | ||
| 20 | const std::function<std::string(u32)>& inputreg_getter, | ||
| 21 | const std::function<std::string(u32)>& outputreg_getter, | ||
| 22 | bool sanitize_mul, const std::string& emit_cb = "", | ||
| 23 | const std::string& setemit_cb = ""); | ||
| 24 | |||
| 25 | } // namespace Decompiler | ||
| 26 | } // namespace Shader | ||
| 27 | } // namespace Maxwell3D | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp new file mode 100644 index 000000000..f242bce1d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 7 | |||
| 8 | namespace GLShader { | ||
| 9 | |||
| 10 | std::string GenerateVertexShader(const MaxwellVSConfig& config) { | ||
| 11 | UNIMPLEMENTED(); | ||
| 12 | return {}; | ||
| 13 | } | ||
| 14 | |||
| 15 | std::string GenerateFragmentShader(const MaxwellFSConfig& config) { | ||
| 16 | UNIMPLEMENTED(); | ||
| 17 | return {}; | ||
| 18 | } | ||
| 19 | |||
| 20 | } // namespace GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h new file mode 100644 index 000000000..5101e7d30 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstring> | ||
| 8 | #include <string> | ||
| 9 | #include <type_traits> | ||
| 10 | #include "common/hash.h" | ||
| 11 | |||
| 12 | namespace GLShader { | ||
| 13 | |||
| 14 | enum Attributes { | ||
| 15 | ATTRIBUTE_POSITION, | ||
| 16 | ATTRIBUTE_COLOR, | ||
| 17 | ATTRIBUTE_TEXCOORD0, | ||
| 18 | ATTRIBUTE_TEXCOORD1, | ||
| 19 | ATTRIBUTE_TEXCOORD2, | ||
| 20 | ATTRIBUTE_TEXCOORD0_W, | ||
| 21 | ATTRIBUTE_NORMQUAT, | ||
| 22 | ATTRIBUTE_VIEW, | ||
| 23 | }; | ||
| 24 | |||
| 25 | struct MaxwellShaderConfigCommon { | ||
| 26 | explicit MaxwellShaderConfigCommon(){}; | ||
| 27 | }; | ||
| 28 | |||
| 29 | struct MaxwellVSConfig : MaxwellShaderConfigCommon { | ||
| 30 | explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} | ||
| 31 | |||
| 32 | bool operator==(const MaxwellVSConfig& o) const { | ||
| 33 | return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; | ||
| 34 | }; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct MaxwellFSConfig : MaxwellShaderConfigCommon { | ||
| 38 | explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} | ||
| 39 | |||
| 40 | bool operator==(const MaxwellFSConfig& o) const { | ||
| 41 | return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; | ||
| 42 | }; | ||
| 43 | }; | ||
| 44 | |||
| 45 | std::string GenerateVertexShader(const MaxwellVSConfig& config); | ||
| 46 | std::string GenerateFragmentShader(const MaxwellFSConfig& config); | ||
| 47 | |||
| 48 | } // namespace GLShader | ||
| 49 | |||
| 50 | namespace std { | ||
| 51 | |||
| 52 | template <> | ||
| 53 | struct hash<GLShader::MaxwellVSConfig> { | ||
| 54 | size_t operator()(const GLShader::MaxwellVSConfig& k) const { | ||
| 55 | return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); | ||
| 56 | } | ||
| 57 | }; | ||
| 58 | |||
| 59 | template <> | ||
| 60 | struct hash<GLShader::MaxwellFSConfig> { | ||
| 61 | size_t operator()(const GLShader::MaxwellFSConfig& k) const { | ||
| 62 | return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); | ||
| 63 | } | ||
| 64 | }; | ||
| 65 | |||
| 66 | } // namespace std | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4da241d83..a3ba16761 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp | |||
| @@ -10,53 +10,85 @@ | |||
| 10 | 10 | ||
| 11 | namespace GLShader { | 11 | namespace GLShader { |
| 12 | 12 | ||
| 13 | GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { | 13 | GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, |
| 14 | 14 | const char* fragment_shader, const std::vector<const char*>& feedback_vars, | |
| 15 | bool separable_program) { | ||
| 15 | // Create the shaders | 16 | // Create the shaders |
| 16 | GLuint vertex_shader_id = glCreateShader(GL_VERTEX_SHADER); | 17 | GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; |
| 17 | GLuint fragment_shader_id = glCreateShader(GL_FRAGMENT_SHADER); | 18 | GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; |
| 19 | GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; | ||
| 18 | 20 | ||
| 19 | GLint result = GL_FALSE; | 21 | GLint result = GL_FALSE; |
| 20 | int info_log_length; | 22 | int info_log_length; |
| 21 | 23 | ||
| 22 | // Compile Vertex Shader | 24 | if (vertex_shader) { |
| 23 | LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); | 25 | // Compile Vertex Shader |
| 24 | 26 | LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); | |
| 25 | glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); | 27 | |
| 26 | glCompileShader(vertex_shader_id); | 28 | glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); |
| 27 | 29 | glCompileShader(vertex_shader_id); | |
| 28 | // Check Vertex Shader | 30 | |
| 29 | glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); | 31 | // Check Vertex Shader |
| 30 | glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | 32 | glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); |
| 31 | 33 | glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | |
| 32 | if (info_log_length > 1) { | 34 | |
| 33 | std::vector<char> vertex_shader_error(info_log_length); | 35 | if (info_log_length > 1) { |
| 34 | glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); | 36 | std::vector<char> vertex_shader_error(info_log_length); |
| 35 | if (result == GL_TRUE) { | 37 | glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); |
| 36 | LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); | 38 | if (result == GL_TRUE) { |
| 37 | } else { | 39 | LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); |
| 38 | LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", &vertex_shader_error[0]); | 40 | } else { |
| 41 | LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", | ||
| 42 | &vertex_shader_error[0]); | ||
| 43 | } | ||
| 39 | } | 44 | } |
| 40 | } | 45 | } |
| 41 | 46 | ||
| 42 | // Compile Fragment Shader | 47 | if (geometry_shader) { |
| 43 | LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); | 48 | // Compile Geometry Shader |
| 44 | 49 | LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); | |
| 45 | glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); | 50 | |
| 46 | glCompileShader(fragment_shader_id); | 51 | glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); |
| 47 | 52 | glCompileShader(geometry_shader_id); | |
| 48 | // Check Fragment Shader | 53 | |
| 49 | glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); | 54 | // Check Geometry Shader |
| 50 | glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | 55 | glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); |
| 56 | glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 57 | |||
| 58 | if (info_log_length > 1) { | ||
| 59 | std::vector<char> geometry_shader_error(info_log_length); | ||
| 60 | glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, | ||
| 61 | &geometry_shader_error[0]); | ||
| 62 | if (result == GL_TRUE) { | ||
| 63 | LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); | ||
| 64 | } else { | ||
| 65 | LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s", | ||
| 66 | &geometry_shader_error[0]); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } | ||
| 51 | 70 | ||
| 52 | if (info_log_length > 1) { | 71 | if (fragment_shader) { |
| 53 | std::vector<char> fragment_shader_error(info_log_length); | 72 | // Compile Fragment Shader |
| 54 | glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, &fragment_shader_error[0]); | 73 | LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); |
| 55 | if (result == GL_TRUE) { | 74 | |
| 56 | LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); | 75 | glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); |
| 57 | } else { | 76 | glCompileShader(fragment_shader_id); |
| 58 | LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", | 77 | |
| 59 | &fragment_shader_error[0]); | 78 | // Check Fragment Shader |
| 79 | glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); | ||
| 80 | glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); | ||
| 81 | |||
| 82 | if (info_log_length > 1) { | ||
| 83 | std::vector<char> fragment_shader_error(info_log_length); | ||
| 84 | glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, | ||
| 85 | &fragment_shader_error[0]); | ||
| 86 | if (result == GL_TRUE) { | ||
| 87 | LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); | ||
| 88 | } else { | ||
| 89 | LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", | ||
| 90 | &fragment_shader_error[0]); | ||
| 91 | } | ||
| 60 | } | 92 | } |
| 61 | } | 93 | } |
| 62 | 94 | ||
| @@ -64,8 +96,25 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { | |||
| 64 | LOG_DEBUG(Render_OpenGL, "Linking program..."); | 96 | LOG_DEBUG(Render_OpenGL, "Linking program..."); |
| 65 | 97 | ||
| 66 | GLuint program_id = glCreateProgram(); | 98 | GLuint program_id = glCreateProgram(); |
| 67 | glAttachShader(program_id, vertex_shader_id); | 99 | if (vertex_shader) { |
| 68 | glAttachShader(program_id, fragment_shader_id); | 100 | glAttachShader(program_id, vertex_shader_id); |
| 101 | } | ||
| 102 | if (geometry_shader) { | ||
| 103 | glAttachShader(program_id, geometry_shader_id); | ||
| 104 | } | ||
| 105 | if (fragment_shader) { | ||
| 106 | glAttachShader(program_id, fragment_shader_id); | ||
| 107 | } | ||
| 108 | |||
| 109 | if (!feedback_vars.empty()) { | ||
| 110 | auto varyings = feedback_vars; | ||
| 111 | glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), | ||
| 112 | &varyings[0], GL_INTERLEAVED_ATTRIBS); | ||
| 113 | } | ||
| 114 | |||
| 115 | if (separable_program) { | ||
| 116 | glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); | ||
| 117 | } | ||
| 69 | 118 | ||
| 70 | glLinkProgram(program_id); | 119 | glLinkProgram(program_id); |
| 71 | 120 | ||
| @@ -85,13 +134,30 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { | |||
| 85 | 134 | ||
| 86 | // If the program linking failed at least one of the shaders was probably bad | 135 | // If the program linking failed at least one of the shaders was probably bad |
| 87 | if (result == GL_FALSE) { | 136 | if (result == GL_FALSE) { |
| 88 | LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); | 137 | if (vertex_shader) { |
| 89 | LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); | 138 | LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); |
| 139 | } | ||
| 140 | if (geometry_shader) { | ||
| 141 | LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); | ||
| 142 | } | ||
| 143 | if (fragment_shader) { | ||
| 144 | LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); | ||
| 145 | } | ||
| 90 | } | 146 | } |
| 91 | ASSERT_MSG(result == GL_TRUE, "Shader not linked"); | 147 | ASSERT_MSG(result == GL_TRUE, "Shader not linked"); |
| 92 | 148 | ||
| 93 | glDeleteShader(vertex_shader_id); | 149 | if (vertex_shader) { |
| 94 | glDeleteShader(fragment_shader_id); | 150 | glDetachShader(program_id, vertex_shader_id); |
| 151 | glDeleteShader(vertex_shader_id); | ||
| 152 | } | ||
| 153 | if (geometry_shader) { | ||
| 154 | glDetachShader(program_id, geometry_shader_id); | ||
| 155 | glDeleteShader(geometry_shader_id); | ||
| 156 | } | ||
| 157 | if (fragment_shader) { | ||
| 158 | glDetachShader(program_id, fragment_shader_id); | ||
| 159 | glDeleteShader(fragment_shader_id); | ||
| 160 | } | ||
| 95 | 161 | ||
| 96 | return program_id; | 162 | return program_id; |
| 97 | } | 163 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index a4bcffdfa..fc7b5e080 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | ||
| 7 | #include <glad/glad.h> | 8 | #include <glad/glad.h> |
| 8 | 9 | ||
| 9 | namespace GLShader { | 10 | namespace GLShader { |
| @@ -11,9 +12,12 @@ namespace GLShader { | |||
| 11 | /** | 12 | /** |
| 12 | * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) | 13 | * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) |
| 13 | * @param vertex_shader String of the GLSL vertex shader program | 14 | * @param vertex_shader String of the GLSL vertex shader program |
| 15 | * @param geometry_shader String of the GLSL geometry shader program | ||
| 14 | * @param fragment_shader String of the GLSL fragment shader program | 16 | * @param fragment_shader String of the GLSL fragment shader program |
| 15 | * @returns Handle of the newly created OpenGL shader object | 17 | * @returns Handle of the newly created OpenGL shader object |
| 16 | */ | 18 | */ |
| 17 | GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader); | 19 | GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, |
| 20 | const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, | ||
| 21 | bool separable_program = false); | ||
| 18 | 22 | ||
| 19 | } // namespace GLShader | 23 | } // namespace GLShader |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 5770ae08f..1d396728b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -33,7 +33,7 @@ OpenGLState::OpenGLState() { | |||
| 33 | stencil.action_depth_pass = GL_KEEP; | 33 | stencil.action_depth_pass = GL_KEEP; |
| 34 | stencil.action_stencil_fail = GL_KEEP; | 34 | stencil.action_stencil_fail = GL_KEEP; |
| 35 | 35 | ||
| 36 | blend.enabled = false; | 36 | blend.enabled = true; |
| 37 | blend.rgb_equation = GL_FUNC_ADD; | 37 | blend.rgb_equation = GL_FUNC_ADD; |
| 38 | blend.a_equation = GL_FUNC_ADD; | 38 | blend.a_equation = GL_FUNC_ADD; |
| 39 | blend.src_rgb_func = GL_ONE; | 39 | blend.src_rgb_func = GL_ONE; |
| @@ -68,6 +68,18 @@ OpenGLState::OpenGLState() { | |||
| 68 | draw.vertex_buffer = 0; | 68 | draw.vertex_buffer = 0; |
| 69 | draw.uniform_buffer = 0; | 69 | draw.uniform_buffer = 0; |
| 70 | draw.shader_program = 0; | 70 | draw.shader_program = 0; |
| 71 | draw.program_pipeline = 0; | ||
| 72 | |||
| 73 | scissor.enabled = false; | ||
| 74 | scissor.x = 0; | ||
| 75 | scissor.y = 0; | ||
| 76 | scissor.width = 0; | ||
| 77 | scissor.height = 0; | ||
| 78 | |||
| 79 | viewport.x = 0; | ||
| 80 | viewport.y = 0; | ||
| 81 | viewport.width = 0; | ||
| 82 | viewport.height = 0; | ||
| 71 | 83 | ||
| 72 | clip_distance = {}; | 84 | clip_distance = {}; |
| 73 | } | 85 | } |
| @@ -148,9 +160,6 @@ void OpenGLState::Apply() const { | |||
| 148 | if (blend.enabled != cur_state.blend.enabled) { | 160 | if (blend.enabled != cur_state.blend.enabled) { |
| 149 | if (blend.enabled) { | 161 | if (blend.enabled) { |
| 150 | glEnable(GL_BLEND); | 162 | glEnable(GL_BLEND); |
| 151 | |||
| 152 | cur_state.logic_op = GL_COPY; | ||
| 153 | glLogicOp(cur_state.logic_op); | ||
| 154 | glDisable(GL_COLOR_LOGIC_OP); | 163 | glDisable(GL_COLOR_LOGIC_OP); |
| 155 | } else { | 164 | } else { |
| 156 | glDisable(GL_BLEND); | 165 | glDisable(GL_BLEND); |
| @@ -196,7 +205,7 @@ void OpenGLState::Apply() const { | |||
| 196 | // Lighting LUTs | 205 | // Lighting LUTs |
| 197 | if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { | 206 | if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { |
| 198 | glActiveTexture(TextureUnits::LightingLUT.Enum()); | 207 | glActiveTexture(TextureUnits::LightingLUT.Enum()); |
| 199 | glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer); | 208 | glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); |
| 200 | } | 209 | } |
| 201 | 210 | ||
| 202 | // Fog LUT | 211 | // Fog LUT |
| @@ -263,6 +272,31 @@ void OpenGLState::Apply() const { | |||
| 263 | glUseProgram(draw.shader_program); | 272 | glUseProgram(draw.shader_program); |
| 264 | } | 273 | } |
| 265 | 274 | ||
| 275 | // Program pipeline | ||
| 276 | if (draw.program_pipeline != cur_state.draw.program_pipeline) { | ||
| 277 | glBindProgramPipeline(draw.program_pipeline); | ||
| 278 | } | ||
| 279 | |||
| 280 | // Scissor test | ||
| 281 | if (scissor.enabled != cur_state.scissor.enabled) { | ||
| 282 | if (scissor.enabled) { | ||
| 283 | glEnable(GL_SCISSOR_TEST); | ||
| 284 | } else { | ||
| 285 | glDisable(GL_SCISSOR_TEST); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | |||
| 289 | if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y || | ||
| 290 | scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) { | ||
| 291 | glScissor(scissor.x, scissor.y, scissor.width, scissor.height); | ||
| 292 | } | ||
| 293 | |||
| 294 | if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y || | ||
| 295 | viewport.width != cur_state.viewport.width || | ||
| 296 | viewport.height != cur_state.viewport.height) { | ||
| 297 | glViewport(viewport.x, viewport.y, viewport.width, viewport.height); | ||
| 298 | } | ||
| 299 | |||
| 266 | // Clip distance | 300 | // Clip distance |
| 267 | for (size_t i = 0; i < clip_distance.size(); ++i) { | 301 | for (size_t i = 0; i < clip_distance.size(); ++i) { |
| 268 | if (clip_distance[i] != cur_state.clip_distance[i]) { | 302 | if (clip_distance[i] != cur_state.clip_distance[i]) { |
| @@ -277,62 +311,75 @@ void OpenGLState::Apply() const { | |||
| 277 | cur_state = *this; | 311 | cur_state = *this; |
| 278 | } | 312 | } |
| 279 | 313 | ||
| 280 | void OpenGLState::ResetTexture(GLuint handle) { | 314 | OpenGLState& OpenGLState::ResetTexture(GLuint handle) { |
| 281 | for (auto& unit : cur_state.texture_units) { | 315 | for (auto& unit : texture_units) { |
| 282 | if (unit.texture_2d == handle) { | 316 | if (unit.texture_2d == handle) { |
| 283 | unit.texture_2d = 0; | 317 | unit.texture_2d = 0; |
| 284 | } | 318 | } |
| 285 | } | 319 | } |
| 286 | if (cur_state.lighting_lut.texture_buffer == handle) | 320 | if (lighting_lut.texture_buffer == handle) |
| 287 | cur_state.lighting_lut.texture_buffer = 0; | 321 | lighting_lut.texture_buffer = 0; |
| 288 | if (cur_state.fog_lut.texture_buffer == handle) | 322 | if (fog_lut.texture_buffer == handle) |
| 289 | cur_state.fog_lut.texture_buffer = 0; | 323 | fog_lut.texture_buffer = 0; |
| 290 | if (cur_state.proctex_noise_lut.texture_buffer == handle) | 324 | if (proctex_noise_lut.texture_buffer == handle) |
| 291 | cur_state.proctex_noise_lut.texture_buffer = 0; | 325 | proctex_noise_lut.texture_buffer = 0; |
| 292 | if (cur_state.proctex_color_map.texture_buffer == handle) | 326 | if (proctex_color_map.texture_buffer == handle) |
| 293 | cur_state.proctex_color_map.texture_buffer = 0; | 327 | proctex_color_map.texture_buffer = 0; |
| 294 | if (cur_state.proctex_alpha_map.texture_buffer == handle) | 328 | if (proctex_alpha_map.texture_buffer == handle) |
| 295 | cur_state.proctex_alpha_map.texture_buffer = 0; | 329 | proctex_alpha_map.texture_buffer = 0; |
| 296 | if (cur_state.proctex_lut.texture_buffer == handle) | 330 | if (proctex_lut.texture_buffer == handle) |
| 297 | cur_state.proctex_lut.texture_buffer = 0; | 331 | proctex_lut.texture_buffer = 0; |
| 298 | if (cur_state.proctex_diff_lut.texture_buffer == handle) | 332 | if (proctex_diff_lut.texture_buffer == handle) |
| 299 | cur_state.proctex_diff_lut.texture_buffer = 0; | 333 | proctex_diff_lut.texture_buffer = 0; |
| 334 | return *this; | ||
| 300 | } | 335 | } |
| 301 | 336 | ||
| 302 | void OpenGLState::ResetSampler(GLuint handle) { | 337 | OpenGLState& OpenGLState::ResetSampler(GLuint handle) { |
| 303 | for (auto& unit : cur_state.texture_units) { | 338 | for (auto& unit : texture_units) { |
| 304 | if (unit.sampler == handle) { | 339 | if (unit.sampler == handle) { |
| 305 | unit.sampler = 0; | 340 | unit.sampler = 0; |
| 306 | } | 341 | } |
| 307 | } | 342 | } |
| 343 | return *this; | ||
| 344 | } | ||
| 345 | |||
| 346 | OpenGLState& OpenGLState::ResetProgram(GLuint handle) { | ||
| 347 | if (draw.shader_program == handle) { | ||
| 348 | draw.shader_program = 0; | ||
| 349 | } | ||
| 350 | return *this; | ||
| 308 | } | 351 | } |
| 309 | 352 | ||
| 310 | void OpenGLState::ResetProgram(GLuint handle) { | 353 | OpenGLState& OpenGLState::ResetPipeline(GLuint handle) { |
| 311 | if (cur_state.draw.shader_program == handle) { | 354 | if (draw.program_pipeline == handle) { |
| 312 | cur_state.draw.shader_program = 0; | 355 | draw.program_pipeline = 0; |
| 313 | } | 356 | } |
| 357 | return *this; | ||
| 314 | } | 358 | } |
| 315 | 359 | ||
| 316 | void OpenGLState::ResetBuffer(GLuint handle) { | 360 | OpenGLState& OpenGLState::ResetBuffer(GLuint handle) { |
| 317 | if (cur_state.draw.vertex_buffer == handle) { | 361 | if (draw.vertex_buffer == handle) { |
| 318 | cur_state.draw.vertex_buffer = 0; | 362 | draw.vertex_buffer = 0; |
| 319 | } | 363 | } |
| 320 | if (cur_state.draw.uniform_buffer == handle) { | 364 | if (draw.uniform_buffer == handle) { |
| 321 | cur_state.draw.uniform_buffer = 0; | 365 | draw.uniform_buffer = 0; |
| 322 | } | 366 | } |
| 367 | return *this; | ||
| 323 | } | 368 | } |
| 324 | 369 | ||
| 325 | void OpenGLState::ResetVertexArray(GLuint handle) { | 370 | OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { |
| 326 | if (cur_state.draw.vertex_array == handle) { | 371 | if (draw.vertex_array == handle) { |
| 327 | cur_state.draw.vertex_array = 0; | 372 | draw.vertex_array = 0; |
| 328 | } | 373 | } |
| 374 | return *this; | ||
| 329 | } | 375 | } |
| 330 | 376 | ||
| 331 | void OpenGLState::ResetFramebuffer(GLuint handle) { | 377 | OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { |
| 332 | if (cur_state.draw.read_framebuffer == handle) { | 378 | if (draw.read_framebuffer == handle) { |
| 333 | cur_state.draw.read_framebuffer = 0; | 379 | draw.read_framebuffer = 0; |
| 334 | } | 380 | } |
| 335 | if (cur_state.draw.draw_framebuffer == handle) { | 381 | if (draw.draw_framebuffer == handle) { |
| 336 | cur_state.draw.draw_framebuffer = 0; | 382 | draw.draw_framebuffer = 0; |
| 337 | } | 383 | } |
| 384 | return *this; | ||
| 338 | } | 385 | } |
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 437fe34c4..940575dfa 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -122,27 +122,44 @@ public: | |||
| 122 | GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING | 122 | GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING |
| 123 | GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING | 123 | GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING |
| 124 | GLuint shader_program; // GL_CURRENT_PROGRAM | 124 | GLuint shader_program; // GL_CURRENT_PROGRAM |
| 125 | GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING | ||
| 125 | } draw; | 126 | } draw; |
| 126 | 127 | ||
| 128 | struct { | ||
| 129 | bool enabled; // GL_SCISSOR_TEST | ||
| 130 | GLint x; | ||
| 131 | GLint y; | ||
| 132 | GLsizei width; | ||
| 133 | GLsizei height; | ||
| 134 | } scissor; | ||
| 135 | |||
| 136 | struct { | ||
| 137 | GLint x; | ||
| 138 | GLint y; | ||
| 139 | GLsizei width; | ||
| 140 | GLsizei height; | ||
| 141 | } viewport; | ||
| 142 | |||
| 127 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE | 143 | std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE |
| 128 | 144 | ||
| 129 | OpenGLState(); | 145 | OpenGLState(); |
| 130 | 146 | ||
| 131 | /// Get the currently active OpenGL state | 147 | /// Get the currently active OpenGL state |
| 132 | static const OpenGLState& GetCurState() { | 148 | static OpenGLState GetCurState() { |
| 133 | return cur_state; | 149 | return cur_state; |
| 134 | } | 150 | } |
| 135 | 151 | ||
| 136 | /// Apply this state as the current OpenGL state | 152 | /// Apply this state as the current OpenGL state |
| 137 | void Apply() const; | 153 | void Apply() const; |
| 138 | 154 | ||
| 139 | /// Resets and unbinds any references to the given resource in the current OpenGL state | 155 | /// Resets any references to the given resource |
| 140 | static void ResetTexture(GLuint handle); | 156 | OpenGLState& ResetTexture(GLuint handle); |
| 141 | static void ResetSampler(GLuint handle); | 157 | OpenGLState& ResetSampler(GLuint handle); |
| 142 | static void ResetProgram(GLuint handle); | 158 | OpenGLState& ResetProgram(GLuint handle); |
| 143 | static void ResetBuffer(GLuint handle); | 159 | OpenGLState& ResetPipeline(GLuint handle); |
| 144 | static void ResetVertexArray(GLuint handle); | 160 | OpenGLState& ResetBuffer(GLuint handle); |
| 145 | static void ResetFramebuffer(GLuint handle); | 161 | OpenGLState& ResetVertexArray(GLuint handle); |
| 162 | OpenGLState& ResetFramebuffer(GLuint handle); | ||
| 146 | 163 | ||
| 147 | private: | 164 | private: |
| 148 | static OpenGLState cur_state; | 165 | static OpenGLState cur_state; |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp new file mode 100644 index 000000000..a2713e9f0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <deque> | ||
| 6 | #include <vector> | ||
| 7 | #include "common/alignment.h" | ||
| 8 | #include "common/assert.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 11 | |||
| 12 | class OrphanBuffer : public OGLStreamBuffer { | ||
| 13 | public: | ||
| 14 | explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||
| 15 | ~OrphanBuffer() override; | ||
| 16 | |||
| 17 | private: | ||
| 18 | void Create(size_t size, size_t sync_subdivide) override; | ||
| 19 | void Release() override; | ||
| 20 | |||
| 21 | std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||
| 22 | void Unmap() override; | ||
| 23 | |||
| 24 | std::vector<u8> data; | ||
| 25 | }; | ||
| 26 | |||
| 27 | class StorageBuffer : public OGLStreamBuffer { | ||
| 28 | public: | ||
| 29 | explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} | ||
| 30 | ~StorageBuffer() override; | ||
| 31 | |||
| 32 | private: | ||
| 33 | void Create(size_t size, size_t sync_subdivide) override; | ||
| 34 | void Release() override; | ||
| 35 | |||
| 36 | std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; | ||
| 37 | void Unmap() override; | ||
| 38 | |||
| 39 | struct Fence { | ||
| 40 | OGLSync sync; | ||
| 41 | size_t offset; | ||
| 42 | }; | ||
| 43 | std::deque<Fence> head; | ||
| 44 | std::deque<Fence> tail; | ||
| 45 | |||
| 46 | u8* mapped_ptr; | ||
| 47 | }; | ||
| 48 | |||
| 49 | OGLStreamBuffer::OGLStreamBuffer(GLenum target) { | ||
| 50 | gl_target = target; | ||
| 51 | } | ||
| 52 | |||
| 53 | GLuint OGLStreamBuffer::GetHandle() const { | ||
| 54 | return gl_buffer.handle; | ||
| 55 | } | ||
| 56 | |||
| 57 | std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { | ||
| 58 | if (storage_buffer) { | ||
| 59 | return std::make_unique<StorageBuffer>(target); | ||
| 60 | } | ||
| 61 | return std::make_unique<OrphanBuffer>(target); | ||
| 62 | } | ||
| 63 | |||
| 64 | OrphanBuffer::~OrphanBuffer() { | ||
| 65 | Release(); | ||
| 66 | } | ||
| 67 | |||
| 68 | void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { | ||
| 69 | buffer_pos = 0; | ||
| 70 | buffer_size = size; | ||
| 71 | data.resize(buffer_size); | ||
| 72 | |||
| 73 | if (gl_buffer.handle == 0) { | ||
| 74 | gl_buffer.Create(); | ||
| 75 | glBindBuffer(gl_target, gl_buffer.handle); | ||
| 76 | } | ||
| 77 | |||
| 78 | glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); | ||
| 79 | } | ||
| 80 | |||
| 81 | void OrphanBuffer::Release() { | ||
| 82 | gl_buffer.Release(); | ||
| 83 | } | ||
| 84 | |||
| 85 | std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { | ||
| 86 | buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||
| 87 | |||
| 88 | if (buffer_pos + size > buffer_size) { | ||
| 89 | Create(std::max(buffer_size, size), 0); | ||
| 90 | } | ||
| 91 | |||
| 92 | mapped_size = size; | ||
| 93 | return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||
| 94 | } | ||
| 95 | |||
| 96 | void OrphanBuffer::Unmap() { | ||
| 97 | glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), | ||
| 98 | static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); | ||
| 99 | buffer_pos += mapped_size; | ||
| 100 | } | ||
| 101 | |||
| 102 | StorageBuffer::~StorageBuffer() { | ||
| 103 | Release(); | ||
| 104 | } | ||
| 105 | |||
| 106 | void StorageBuffer::Create(size_t size, size_t sync_subdivide) { | ||
| 107 | if (gl_buffer.handle != 0) | ||
| 108 | return; | ||
| 109 | |||
| 110 | buffer_pos = 0; | ||
| 111 | buffer_size = size; | ||
| 112 | buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); | ||
| 113 | |||
| 114 | gl_buffer.Create(); | ||
| 115 | glBindBuffer(gl_target, gl_buffer.handle); | ||
| 116 | |||
| 117 | glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, | ||
| 118 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); | ||
| 119 | mapped_ptr = reinterpret_cast<u8*>( | ||
| 120 | glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), | ||
| 121 | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); | ||
| 122 | } | ||
| 123 | |||
| 124 | void StorageBuffer::Release() { | ||
| 125 | if (gl_buffer.handle == 0) | ||
| 126 | return; | ||
| 127 | |||
| 128 | glUnmapBuffer(gl_target); | ||
| 129 | |||
| 130 | gl_buffer.Release(); | ||
| 131 | head.clear(); | ||
| 132 | tail.clear(); | ||
| 133 | } | ||
| 134 | |||
| 135 | std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { | ||
| 136 | ASSERT(size <= buffer_size); | ||
| 137 | |||
| 138 | OGLSync sync; | ||
| 139 | |||
| 140 | buffer_pos = Common::AlignUp(buffer_pos, alignment); | ||
| 141 | size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); | ||
| 142 | |||
| 143 | if (!head.empty() && | ||
| 144 | (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { | ||
| 145 | ASSERT(head.back().sync.handle == 0); | ||
| 146 | head.back().sync.Create(); | ||
| 147 | } | ||
| 148 | |||
| 149 | if (buffer_pos + size > buffer_size) { | ||
| 150 | if (!tail.empty()) { | ||
| 151 | std::swap(sync, tail.back().sync); | ||
| 152 | tail.clear(); | ||
| 153 | } | ||
| 154 | std::swap(tail, head); | ||
| 155 | buffer_pos = 0; | ||
| 156 | effective_offset = 0; | ||
| 157 | } | ||
| 158 | |||
| 159 | while (!tail.empty() && buffer_pos + size > tail.front().offset) { | ||
| 160 | std::swap(sync, tail.front().sync); | ||
| 161 | tail.pop_front(); | ||
| 162 | } | ||
| 163 | |||
| 164 | if (sync.handle != 0) { | ||
| 165 | glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); | ||
| 166 | sync.Release(); | ||
| 167 | } | ||
| 168 | |||
| 169 | if (head.empty() || effective_offset > head.back().offset) { | ||
| 170 | head.emplace_back(); | ||
| 171 | head.back().offset = effective_offset; | ||
| 172 | } | ||
| 173 | |||
| 174 | mapped_size = size; | ||
| 175 | return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); | ||
| 176 | } | ||
| 177 | |||
| 178 | void StorageBuffer::Unmap() { | ||
| 179 | glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), | ||
| 180 | static_cast<GLsizeiptr>(mapped_size)); | ||
| 181 | buffer_pos += mapped_size; | ||
| 182 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h new file mode 100644 index 000000000..4bc2f52e0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -0,0 +1,34 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <glad/glad.h> | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 9 | |||
| 10 | class OGLStreamBuffer : private NonCopyable { | ||
| 11 | public: | ||
| 12 | explicit OGLStreamBuffer(GLenum target); | ||
| 13 | virtual ~OGLStreamBuffer() = default; | ||
| 14 | |||
| 15 | public: | ||
| 16 | static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); | ||
| 17 | |||
| 18 | virtual void Create(size_t size, size_t sync_subdivide) = 0; | ||
| 19 | virtual void Release() {} | ||
| 20 | |||
| 21 | GLuint GetHandle() const; | ||
| 22 | |||
| 23 | virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; | ||
| 24 | virtual void Unmap() = 0; | ||
| 25 | |||
| 26 | protected: | ||
| 27 | OGLBuffer gl_buffer; | ||
| 28 | GLenum gl_target; | ||
| 29 | |||
| 30 | size_t buffer_pos = 0; | ||
| 31 | size_t buffer_size = 0; | ||
| 32 | size_t buffer_sync_subdivide = 0; | ||
| 33 | size_t mapped_size = 0; | ||
| 34 | }; | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7f921fa32..65d38ade5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -318,7 +318,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 318 | 0.0f); | 318 | 0.0f); |
| 319 | 319 | ||
| 320 | // Link shaders and get variable locations | 320 | // Link shaders and get variable locations |
| 321 | shader.Create(vertex_shader, fragment_shader); | 321 | shader.Create(vertex_shader, nullptr, fragment_shader); |
| 322 | state.draw.shader_program = shader.handle; | 322 | state.draw.shader_program = shader.handle; |
| 323 | state.Apply(); | 323 | state.Apply(); |
| 324 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); | 324 | uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |