diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 180 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.h | 18 |
2 files changed, 141 insertions, 57 deletions
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 8d11fbc55..6b62fa1da 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -15,6 +15,12 @@ | |||
| 15 | #include "video_core/shader_notify.h" | 15 | #include "video_core/shader_notify.h" |
| 16 | #include "video_core/texture_cache/texture_cache.h" | 16 | #include "video_core/texture_cache/texture_cache.h" |
| 17 | 17 | ||
| 18 | #if defined(_MSC_VER) && defined(NDEBUG) | ||
| 19 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | ||
| 20 | #else | ||
| 21 | #define LAMBDA_FORCEINLINE | ||
| 22 | #endif | ||
| 23 | |||
| 18 | namespace OpenGL { | 24 | namespace OpenGL { |
| 19 | namespace { | 25 | namespace { |
| 20 | using Shader::ImageBufferDescriptor; | 26 | using Shader::ImageBufferDescriptor; |
| @@ -98,13 +104,76 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | |||
| 98 | return {GL_POSITION, 0}; | 104 | return {GL_POSITION, 0}; |
| 99 | } | 105 | } |
| 100 | 106 | ||
| 101 | struct Spec { | 107 | template <typename Spec> |
| 108 | bool Passes(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) { | ||
| 109 | for (size_t stage = 0; stage < stage_infos.size(); ++stage) { | ||
| 110 | if (!Spec::enabled_stages[stage] && ((enabled_mask >> stage) & 1) != 0) { | ||
| 111 | return false; | ||
| 112 | } | ||
| 113 | const auto& info{stage_infos[stage]}; | ||
| 114 | if constexpr (!Spec::has_storage_buffers) { | ||
| 115 | if (!info.storage_buffers_descriptors.empty()) { | ||
| 116 | return false; | ||
| 117 | } | ||
| 118 | } | ||
| 119 | if constexpr (!Spec::has_texture_buffers) { | ||
| 120 | if (!info.texture_buffer_descriptors.empty()) { | ||
| 121 | return false; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | if constexpr (!Spec::has_image_buffers) { | ||
| 125 | if (!info.image_buffer_descriptors.empty()) { | ||
| 126 | return false; | ||
| 127 | } | ||
| 128 | } | ||
| 129 | if constexpr (!Spec::has_images) { | ||
| 130 | if (!info.image_descriptors.empty()) { | ||
| 131 | return false; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | } | ||
| 135 | return true; | ||
| 136 | } | ||
| 137 | |||
| 138 | using ConfigureFuncPtr = void (*)(GraphicsPipeline*, bool); | ||
| 139 | |||
| 140 | template <typename Spec, typename... Specs> | ||
| 141 | ConfigureFuncPtr FindSpec(const std::array<Shader::Info, 5>& stage_infos, u32 enabled_mask) { | ||
| 142 | if constexpr (sizeof...(Specs) > 0) { | ||
| 143 | if (!Passes<Spec>(stage_infos, enabled_mask)) { | ||
| 144 | return FindSpec<Specs...>(stage_infos, enabled_mask); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | return GraphicsPipeline::MakeConfigureSpecFunc<Spec>(); | ||
| 148 | } | ||
| 149 | |||
| 150 | struct SimpleVertexFragmentSpec { | ||
| 151 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, true}; | ||
| 152 | static constexpr bool has_storage_buffers = false; | ||
| 153 | static constexpr bool has_texture_buffers = false; | ||
| 154 | static constexpr bool has_image_buffers = false; | ||
| 155 | static constexpr bool has_images = false; | ||
| 156 | }; | ||
| 157 | |||
| 158 | struct SimpleVertexSpec { | ||
| 159 | static constexpr std::array<bool, 5> enabled_stages{true, false, false, false, false}; | ||
| 160 | static constexpr bool has_storage_buffers = false; | ||
| 161 | static constexpr bool has_texture_buffers = false; | ||
| 162 | static constexpr bool has_image_buffers = false; | ||
| 163 | static constexpr bool has_images = false; | ||
| 164 | }; | ||
| 165 | |||
| 166 | struct DefaultSpec { | ||
| 102 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | 167 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; |
| 103 | static constexpr bool has_storage_buffers = true; | 168 | static constexpr bool has_storage_buffers = true; |
| 104 | static constexpr bool has_texture_buffers = true; | 169 | static constexpr bool has_texture_buffers = true; |
| 105 | static constexpr bool has_image_buffers = true; | 170 | static constexpr bool has_image_buffers = true; |
| 106 | static constexpr bool has_images = true; | 171 | static constexpr bool has_images = true; |
| 107 | }; | 172 | }; |
| 173 | |||
| 174 | ConfigureFuncPtr ConfigureFunc(const std::array<Shader::Info, 5>& infos, u32 enabled_mask) { | ||
| 175 | return FindSpec<SimpleVertexSpec, SimpleVertexFragmentSpec, DefaultSpec>(infos, enabled_mask); | ||
| 176 | } | ||
| 108 | } // Anonymous namespace | 177 | } // Anonymous namespace |
| 109 | 178 | ||
| 110 | size_t GraphicsPipelineKey::Hash() const noexcept { | 179 | size_t GraphicsPipelineKey::Hash() const noexcept { |
| @@ -129,8 +198,52 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 129 | if (shader_notify) { | 198 | if (shader_notify) { |
| 130 | shader_notify->MarkShaderBuilding(); | 199 | shader_notify->MarkShaderBuilding(); |
| 131 | } | 200 | } |
| 132 | std::ranges::transform(infos, stage_infos.begin(), | 201 | u32 num_textures{}; |
| 133 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | 202 | u32 num_images{}; |
| 203 | u32 num_storage_buffers{}; | ||
| 204 | for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { | ||
| 205 | auto& info{stage_infos[stage]}; | ||
| 206 | if (infos[stage]) { | ||
| 207 | info = *infos[stage]; | ||
| 208 | enabled_stages_mask |= 1u << stage; | ||
| 209 | } | ||
| 210 | if (stage < 4) { | ||
| 211 | base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; | ||
| 212 | base_storage_bindings[stage + 1] = base_storage_bindings[stage]; | ||
| 213 | |||
| 214 | base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); | ||
| 215 | base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); | ||
| 216 | } | ||
| 217 | enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; | ||
| 218 | std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 219 | |||
| 220 | const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; | ||
| 221 | num_texture_buffers[stage] += num_tex_buffer_bindings; | ||
| 222 | num_textures += num_tex_buffer_bindings; | ||
| 223 | |||
| 224 | const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; | ||
| 225 | num_image_buffers[stage] += num_img_buffers_bindings; | ||
| 226 | num_images += num_img_buffers_bindings; | ||
| 227 | |||
| 228 | num_textures += AccumulateCount(info.texture_descriptors); | ||
| 229 | num_images += AccumulateCount(info.image_descriptors); | ||
| 230 | num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); | ||
| 231 | |||
| 232 | writes_global_memory |= std::ranges::any_of( | ||
| 233 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | ||
| 234 | } | ||
| 235 | ASSERT(num_textures <= MAX_TEXTURES); | ||
| 236 | ASSERT(num_images <= MAX_IMAGES); | ||
| 237 | |||
| 238 | const bool assembly_shaders{assembly_programs[0].handle != 0}; | ||
| 239 | use_storage_buffers = | ||
| 240 | !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | ||
| 241 | writes_global_memory &= !use_storage_buffers; | ||
| 242 | configure_func = ConfigureFunc(stage_infos, enabled_stages_mask); | ||
| 243 | |||
| 244 | if (assembly_shaders && xfb_state) { | ||
| 245 | GenerateTransformFeedbackState(*xfb_state); | ||
| 246 | } | ||
| 134 | auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { | 247 | auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { |
| 135 | if (!device.UseAssemblyShaders()) { | 248 | if (!device.UseAssemblyShaders()) { |
| 136 | program.handle = glCreateProgram(); | 249 | program.handle = glCreateProgram(); |
| @@ -142,7 +255,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 142 | } | 255 | } |
| 143 | if (device.UseAssemblyShaders()) { | 256 | if (device.UseAssemblyShaders()) { |
| 144 | assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); | 257 | assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); |
| 145 | enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; | ||
| 146 | } else { | 258 | } else { |
| 147 | AttachShader(Stage(stage), program.handle, code); | 259 | AttachShader(Stage(stage), program.handle, code); |
| 148 | } | 260 | } |
| @@ -150,49 +262,6 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 150 | if (!device.UseAssemblyShaders()) { | 262 | if (!device.UseAssemblyShaders()) { |
| 151 | LinkProgram(program.handle); | 263 | LinkProgram(program.handle); |
| 152 | } | 264 | } |
| 153 | u32 num_textures{}; | ||
| 154 | u32 num_images{}; | ||
| 155 | u32 num_storage_buffers{}; | ||
| 156 | for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { | ||
| 157 | const auto& info{stage_infos[stage]}; | ||
| 158 | if (stage < 4) { | ||
| 159 | base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; | ||
| 160 | base_storage_bindings[stage + 1] = base_storage_bindings[stage]; | ||
| 161 | |||
| 162 | base_uniform_bindings[stage + 1] += | ||
| 163 | AccumulateCount(info.constant_buffer_descriptors); | ||
| 164 | base_storage_bindings[stage + 1] += | ||
| 165 | AccumulateCount(info.storage_buffers_descriptors); | ||
| 166 | } | ||
| 167 | enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; | ||
| 168 | std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); | ||
| 169 | |||
| 170 | const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; | ||
| 171 | num_texture_buffers[stage] += num_tex_buffer_bindings; | ||
| 172 | num_textures += num_tex_buffer_bindings; | ||
| 173 | |||
| 174 | const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; | ||
| 175 | num_image_buffers[stage] += num_img_buffers_bindings; | ||
| 176 | num_images += num_img_buffers_bindings; | ||
| 177 | |||
| 178 | num_textures += AccumulateCount(info.texture_descriptors); | ||
| 179 | num_images += AccumulateCount(info.image_descriptors); | ||
| 180 | num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); | ||
| 181 | |||
| 182 | writes_global_memory |= std::ranges::any_of( | ||
| 183 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | ||
| 184 | } | ||
| 185 | ASSERT(num_textures <= MAX_TEXTURES); | ||
| 186 | ASSERT(num_images <= MAX_IMAGES); | ||
| 187 | |||
| 188 | const bool assembly_shaders{assembly_programs[0].handle != 0}; | ||
| 189 | use_storage_buffers = | ||
| 190 | !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); | ||
| 191 | writes_global_memory &= !use_storage_buffers; | ||
| 192 | |||
| 193 | if (assembly_shaders && xfb_state) { | ||
| 194 | GenerateTransformFeedbackState(*xfb_state); | ||
| 195 | } | ||
| 196 | if (shader_notify) { | 265 | if (shader_notify) { |
| 197 | shader_notify->MarkShaderComplete(); | 266 | shader_notify->MarkShaderComplete(); |
| 198 | } | 267 | } |
| @@ -205,7 +274,8 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 205 | } | 274 | } |
| 206 | } | 275 | } |
| 207 | 276 | ||
| 208 | void GraphicsPipeline::Configure(bool is_indexed) { | 277 | template <typename Spec> |
| 278 | void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | ||
| 209 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | 279 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; |
| 210 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | 280 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; |
| 211 | std::array<GLuint, MAX_TEXTURES> samplers; | 281 | std::array<GLuint, MAX_TEXTURES> samplers; |
| @@ -221,7 +291,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { | |||
| 221 | 291 | ||
| 222 | const auto& regs{maxwell3d.regs}; | 292 | const auto& regs{maxwell3d.regs}; |
| 223 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | 293 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; |
| 224 | const auto config_stage{[&](size_t stage) { | 294 | const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { |
| 225 | const Shader::Info& info{stage_infos[stage]}; | 295 | const Shader::Info& info{stage_infos[stage]}; |
| 226 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | 296 | buffer_cache.UnbindGraphicsStorageBuffers(stage); |
| 227 | if constexpr (Spec::has_storage_buffers) { | 297 | if constexpr (Spec::has_storage_buffers) { |
| @@ -311,7 +381,7 @@ void GraphicsPipeline::Configure(bool is_indexed) { | |||
| 311 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 381 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| 312 | 382 | ||
| 313 | ImageId* texture_buffer_index{image_view_ids.data()}; | 383 | ImageId* texture_buffer_index{image_view_ids.data()}; |
| 314 | const auto bind_stage_info{[&](size_t stage) { | 384 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { |
| 315 | size_t index{}; | 385 | size_t index{}; |
| 316 | const auto add_buffer{[&](const auto& desc) { | 386 | const auto add_buffer{[&](const auto& desc) { |
| 317 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | 387 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; |
| @@ -430,6 +500,11 @@ void GraphicsPipeline::Configure(bool is_indexed) { | |||
| 430 | } | 500 | } |
| 431 | } | 501 | } |
| 432 | 502 | ||
| 503 | void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { | ||
| 504 | glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, | ||
| 505 | xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); | ||
| 506 | } | ||
| 507 | |||
| 433 | void GraphicsPipeline::GenerateTransformFeedbackState( | 508 | void GraphicsPipeline::GenerateTransformFeedbackState( |
| 434 | const VideoCommon::TransformFeedbackState& xfb_state) { | 509 | const VideoCommon::TransformFeedbackState& xfb_state) { |
| 435 | // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal | 510 | // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal |
| @@ -475,9 +550,4 @@ void GraphicsPipeline::GenerateTransformFeedbackState( | |||
| 475 | num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data()); | 550 | num_xfb_strides = static_cast<GLsizei>(current_stream - xfb_streams.data()); |
| 476 | } | 551 | } |
| 477 | 552 | ||
| 478 | void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { | ||
| 479 | glTransformFeedbackStreamAttribsNV(num_xfb_attribs, xfb_attribs.data(), num_xfb_strides, | ||
| 480 | xfb_streams.data(), GL_INTERLEAVED_ATTRIBS); | ||
| 481 | } | ||
| 482 | |||
| 483 | } // namespace OpenGL | 553 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 58deafd3c..a3546daa8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -75,7 +75,9 @@ public: | |||
| 75 | const std::array<const Shader::Info*, 5>& infos, | 75 | const std::array<const Shader::Info*, 5>& infos, |
| 76 | const VideoCommon::TransformFeedbackState* xfb_state); | 76 | const VideoCommon::TransformFeedbackState* xfb_state); |
| 77 | 77 | ||
| 78 | void Configure(bool is_indexed); | 78 | void Configure(bool is_indexed) { |
| 79 | configure_func(this, is_indexed); | ||
| 80 | } | ||
| 79 | 81 | ||
| 80 | void ConfigureTransformFeedback() const { | 82 | void ConfigureTransformFeedback() const { |
| 81 | if (num_xfb_attribs != 0) { | 83 | if (num_xfb_attribs != 0) { |
| @@ -91,11 +93,21 @@ public: | |||
| 91 | return is_built.load(std::memory_order::relaxed); | 93 | return is_built.load(std::memory_order::relaxed); |
| 92 | } | 94 | } |
| 93 | 95 | ||
| 96 | template <typename Spec> | ||
| 97 | static auto MakeConfigureSpecFunc() { | ||
| 98 | return [](GraphicsPipeline* pipeline, bool is_indexed) { | ||
| 99 | pipeline->ConfigureImpl<Spec>(is_indexed); | ||
| 100 | }; | ||
| 101 | } | ||
| 102 | |||
| 94 | private: | 103 | private: |
| 95 | void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); | 104 | template <typename Spec> |
| 105 | void ConfigureImpl(bool is_indexed); | ||
| 96 | 106 | ||
| 97 | void ConfigureTransformFeedbackImpl() const; | 107 | void ConfigureTransformFeedbackImpl() const; |
| 98 | 108 | ||
| 109 | void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); | ||
| 110 | |||
| 99 | TextureCache& texture_cache; | 111 | TextureCache& texture_cache; |
| 100 | BufferCache& buffer_cache; | 112 | BufferCache& buffer_cache; |
| 101 | Tegra::MemoryManager& gpu_memory; | 113 | Tegra::MemoryManager& gpu_memory; |
| @@ -103,6 +115,8 @@ private: | |||
| 103 | ProgramManager& program_manager; | 115 | ProgramManager& program_manager; |
| 104 | StateTracker& state_tracker; | 116 | StateTracker& state_tracker; |
| 105 | 117 | ||
| 118 | void (*configure_func)(GraphicsPipeline*, bool){}; | ||
| 119 | |||
| 106 | OGLProgram program; | 120 | OGLProgram program; |
| 107 | std::array<OGLAssemblyProgram, 5> assembly_programs; | 121 | std::array<OGLAssemblyProgram, 5> assembly_programs; |
| 108 | u32 enabled_stages_mask{}; | 122 | u32 enabled_stages_mask{}; |