diff options
Diffstat (limited to 'src')
31 files changed, 742 insertions, 744 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6f3f2aa9f..3b20c7d34 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -22,6 +22,7 @@ add_library(video_core STATIC | |||
| 22 | engines/maxwell_dma.h | 22 | engines/maxwell_dma.h |
| 23 | engines/shader_bytecode.h | 23 | engines/shader_bytecode.h |
| 24 | engines/shader_header.h | 24 | engines/shader_header.h |
| 25 | engines/shader_type.h | ||
| 25 | gpu.cpp | 26 | gpu.cpp |
| 26 | gpu.h | 27 | gpu.h |
| 27 | gpu_asynch.cpp | 28 | gpu_asynch.cpp |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index ac27b6cbe..44b8b8d22 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -8,19 +8,11 @@ | |||
| 8 | #include "common/bit_field.h" | 8 | #include "common/bit_field.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/shader_bytecode.h" | 10 | #include "video_core/engines/shader_bytecode.h" |
| 11 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/textures/texture.h" | 12 | #include "video_core/textures/texture.h" |
| 12 | 13 | ||
| 13 | namespace Tegra::Engines { | 14 | namespace Tegra::Engines { |
| 14 | 15 | ||
| 15 | enum class ShaderType : u32 { | ||
| 16 | Vertex = 0, | ||
| 17 | TesselationControl = 1, | ||
| 18 | TesselationEval = 2, | ||
| 19 | Geometry = 3, | ||
| 20 | Fragment = 4, | ||
| 21 | Compute = 5, | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct SamplerDescriptor { | 16 | struct SamplerDescriptor { |
| 25 | union { | 17 | union { |
| 26 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; | 18 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 3a39aeabe..110406f2f 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "video_core/engines/kepler_compute.h" | 9 | #include "video_core/engines/kepler_compute.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 12 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_base.h" | 14 | #include "video_core/renderer_base.h" |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 5259d92bd..4ef3e0613 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/const_buffer_engine_interface.h" | 13 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 14 | #include "video_core/engines/engine_upload.h" | 14 | #include "video_core/engines/engine_upload.h" |
| 15 | #include "video_core/engines/shader_type.h" | ||
| 15 | #include "video_core/gpu.h" | 16 | #include "video_core/gpu.h" |
| 16 | #include "video_core/textures/texture.h" | 17 | #include "video_core/textures/texture.h" |
| 17 | 18 | ||
| @@ -140,7 +141,7 @@ public: | |||
| 140 | 141 | ||
| 141 | INSERT_PADDING_WORDS(0x3); | 142 | INSERT_PADDING_WORDS(0x3); |
| 142 | 143 | ||
| 143 | BitField<0, 16, u32> shared_alloc; | 144 | BitField<0, 18, u32> shared_alloc; |
| 144 | 145 | ||
| 145 | BitField<16, 16, u32> block_dim_x; | 146 | BitField<16, 16, u32> block_dim_x; |
| 146 | union { | 147 | union { |
| @@ -178,7 +179,12 @@ public: | |||
| 178 | BitField<24, 5, u32> gpr_alloc; | 179 | BitField<24, 5, u32> gpr_alloc; |
| 179 | }; | 180 | }; |
| 180 | 181 | ||
| 181 | INSERT_PADDING_WORDS(0x11); | 182 | union { |
| 183 | BitField<0, 20, u32> local_crs_alloc; | ||
| 184 | BitField<24, 5, u32> sass_version; | ||
| 185 | }; | ||
| 186 | |||
| 187 | INSERT_PADDING_WORDS(0x10); | ||
| 182 | } launch_description{}; | 188 | } launch_description{}; |
| 183 | 189 | ||
| 184 | struct { | 190 | struct { |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a44c09003..15a7a9d6a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 10 | #include "video_core/debug_utils/debug_utils.h" | 10 | #include "video_core/debug_utils/debug_utils.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/engines/shader_type.h" | ||
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 13 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 14 | #include "video_core/textures/texture.h" | 15 | #include "video_core/textures/texture.h" |
| @@ -368,24 +369,24 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { | |||
| 368 | StartCBData(method); | 369 | StartCBData(method); |
| 369 | break; | 370 | break; |
| 370 | } | 371 | } |
| 371 | case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { | 372 | case MAXWELL3D_REG_INDEX(cb_bind[0]): { |
| 372 | ProcessCBBind(Regs::ShaderStage::Vertex); | 373 | ProcessCBBind(0); |
| 373 | break; | 374 | break; |
| 374 | } | 375 | } |
| 375 | case MAXWELL3D_REG_INDEX(cb_bind[1].raw_config): { | 376 | case MAXWELL3D_REG_INDEX(cb_bind[1]): { |
| 376 | ProcessCBBind(Regs::ShaderStage::TesselationControl); | 377 | ProcessCBBind(1); |
| 377 | break; | 378 | break; |
| 378 | } | 379 | } |
| 379 | case MAXWELL3D_REG_INDEX(cb_bind[2].raw_config): { | 380 | case MAXWELL3D_REG_INDEX(cb_bind[2]): { |
| 380 | ProcessCBBind(Regs::ShaderStage::TesselationEval); | 381 | ProcessCBBind(2); |
| 381 | break; | 382 | break; |
| 382 | } | 383 | } |
| 383 | case MAXWELL3D_REG_INDEX(cb_bind[3].raw_config): { | 384 | case MAXWELL3D_REG_INDEX(cb_bind[3]): { |
| 384 | ProcessCBBind(Regs::ShaderStage::Geometry); | 385 | ProcessCBBind(3); |
| 385 | break; | 386 | break; |
| 386 | } | 387 | } |
| 387 | case MAXWELL3D_REG_INDEX(cb_bind[4].raw_config): { | 388 | case MAXWELL3D_REG_INDEX(cb_bind[4]): { |
| 388 | ProcessCBBind(Regs::ShaderStage::Fragment); | 389 | ProcessCBBind(4); |
| 389 | break; | 390 | break; |
| 390 | } | 391 | } |
| 391 | case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): { | 392 | case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): { |
| @@ -687,10 +688,10 @@ void Maxwell3D::DrawArrays() { | |||
| 687 | } | 688 | } |
| 688 | } | 689 | } |
| 689 | 690 | ||
| 690 | void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { | 691 | void Maxwell3D::ProcessCBBind(std::size_t stage_index) { |
| 691 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. | 692 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. |
| 692 | auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | 693 | auto& shader = state.shader_stages[stage_index]; |
| 693 | auto& bind_data = regs.cb_bind[static_cast<std::size_t>(stage)]; | 694 | auto& bind_data = regs.cb_bind[stage_index]; |
| 694 | 695 | ||
| 695 | ASSERT(bind_data.index < Regs::MaxConstBuffers); | 696 | ASSERT(bind_data.index < Regs::MaxConstBuffers); |
| 696 | auto& buffer = shader.const_buffers[bind_data.index]; | 697 | auto& buffer = shader.const_buffers[bind_data.index]; |
| @@ -757,9 +758,9 @@ Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_ha | |||
| 757 | return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; | 758 | return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)}; |
| 758 | } | 759 | } |
| 759 | 760 | ||
| 760 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, | 761 | Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const { |
| 761 | std::size_t offset) const { | 762 | const auto stage_index = static_cast<std::size_t>(stage); |
| 762 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | 763 | const auto& shader = state.shader_stages[stage_index]; |
| 763 | const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; | 764 | const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; |
| 764 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); | 765 | ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); |
| 765 | 766 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 37390eb87..4cb7339b5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | 18 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 19 | #include "video_core/engines/const_buffer_info.h" | 19 | #include "video_core/engines/const_buffer_info.h" |
| 20 | #include "video_core/engines/engine_upload.h" | 20 | #include "video_core/engines/engine_upload.h" |
| 21 | #include "video_core/engines/shader_type.h" | ||
| 21 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 22 | #include "video_core/macro_interpreter.h" | 23 | #include "video_core/macro_interpreter.h" |
| 23 | #include "video_core/textures/texture.h" | 24 | #include "video_core/textures/texture.h" |
| @@ -62,7 +63,6 @@ public: | |||
| 62 | static constexpr std::size_t NumVertexArrays = 32; | 63 | static constexpr std::size_t NumVertexArrays = 32; |
| 63 | static constexpr std::size_t NumVertexAttributes = 32; | 64 | static constexpr std::size_t NumVertexAttributes = 32; |
| 64 | static constexpr std::size_t NumVaryings = 31; | 65 | static constexpr std::size_t NumVaryings = 31; |
| 65 | static constexpr std::size_t NumTextureSamplers = 32; | ||
| 66 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number | 66 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number |
| 67 | static constexpr std::size_t NumClipDistances = 8; | 67 | static constexpr std::size_t NumClipDistances = 8; |
| 68 | static constexpr std::size_t MaxShaderProgram = 6; | 68 | static constexpr std::size_t MaxShaderProgram = 6; |
| @@ -130,14 +130,6 @@ public: | |||
| 130 | Fragment = 5, | 130 | Fragment = 5, |
| 131 | }; | 131 | }; |
| 132 | 132 | ||
| 133 | enum class ShaderStage : u32 { | ||
| 134 | Vertex = 0, | ||
| 135 | TesselationControl = 1, | ||
| 136 | TesselationEval = 2, | ||
| 137 | Geometry = 3, | ||
| 138 | Fragment = 4, | ||
| 139 | }; | ||
| 140 | |||
| 141 | struct VertexAttribute { | 133 | struct VertexAttribute { |
| 142 | enum class Size : u32 { | 134 | enum class Size : u32 { |
| 143 | Invalid = 0x0, | 135 | Invalid = 0x0, |
| @@ -1254,7 +1246,7 @@ public: | |||
| 1254 | Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; | 1246 | Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const; |
| 1255 | 1247 | ||
| 1256 | /// Returns the texture information for a specific texture in a specific shader stage. | 1248 | /// Returns the texture information for a specific texture in a specific shader stage. |
| 1257 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | 1249 | Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const; |
| 1258 | 1250 | ||
| 1259 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 1251 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 1260 | 1252 | ||
| @@ -1376,7 +1368,7 @@ private: | |||
| 1376 | void FinishCBData(); | 1368 | void FinishCBData(); |
| 1377 | 1369 | ||
| 1378 | /// Handles a write to the CB_BIND register. | 1370 | /// Handles a write to the CB_BIND register. |
| 1379 | void ProcessCBBind(Regs::ShaderStage stage); | 1371 | void ProcessCBBind(std::size_t stage_index); |
| 1380 | 1372 | ||
| 1381 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. | 1373 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. |
| 1382 | void DrawArrays(); | 1374 | void DrawArrays(); |
diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h new file mode 100644 index 000000000..49ce5cde5 --- /dev/null +++ b/src/video_core/engines/shader_type.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Tegra::Engines { | ||
| 10 | |||
| 11 | enum class ShaderType : u32 { | ||
| 12 | Vertex = 0, | ||
| 13 | TesselationControl = 1, | ||
| 14 | TesselationEval = 2, | ||
| 15 | Geometry = 3, | ||
| 16 | Fragment = 4, | ||
| 17 | Compute = 5, | ||
| 18 | }; | ||
| 19 | static constexpr std::size_t MaxShaderTypes = 6; | ||
| 20 | |||
| 21 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b30d5be74..a95bd4b2c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <optional> | ||
| 8 | #include <vector> | 9 | #include <vector> |
| 10 | |||
| 9 | #include <glad/glad.h> | 11 | #include <glad/glad.h> |
| 10 | 12 | ||
| 11 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| @@ -17,6 +19,30 @@ namespace OpenGL { | |||
| 17 | 19 | ||
| 18 | namespace { | 20 | namespace { |
| 19 | 21 | ||
| 22 | // One uniform block is reserved for emulation purposes | ||
| 23 | constexpr u32 ReservedUniformBlocks = 1; | ||
| 24 | |||
| 25 | constexpr u32 NumStages = 5; | ||
| 26 | |||
| 27 | constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | ||
| 28 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, | ||
| 29 | GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; | ||
| 30 | |||
| 31 | constexpr std::array LimitSSBOs = { | ||
| 32 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | ||
| 33 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | ||
| 34 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; | ||
| 35 | |||
| 36 | constexpr std::array LimitSamplers = { | ||
| 37 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | ||
| 38 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | ||
| 39 | GL_MAX_TEXTURE_IMAGE_UNITS}; | ||
| 40 | |||
| 41 | constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, | ||
| 42 | GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | ||
| 43 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, | ||
| 44 | GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; | ||
| 45 | |||
| 20 | template <typename T> | 46 | template <typename T> |
| 21 | T GetInteger(GLenum pname) { | 47 | T GetInteger(GLenum pname) { |
| 22 | GLint temporary; | 48 | GLint temporary; |
| @@ -48,13 +74,70 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view | |||
| 48 | return std::find(images.begin(), images.end(), extension) != images.end(); | 74 | return std::find(images.begin(), images.end(), extension) != images.end(); |
| 49 | } | 75 | } |
| 50 | 76 | ||
| 77 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | ||
| 78 | ASSERT(num >= amount); | ||
| 79 | if (limit) { | ||
| 80 | amount = std::min(amount, GetInteger<u32>(*limit)); | ||
| 81 | } | ||
| 82 | num -= amount; | ||
| 83 | return std::exchange(base, base + amount); | ||
| 84 | } | ||
| 85 | |||
| 86 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { | ||
| 87 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; | ||
| 88 | |||
| 89 | static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4}; | ||
| 90 | const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); | ||
| 91 | const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); | ||
| 92 | const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); | ||
| 93 | |||
| 94 | u32 num_ubos = total_ubos - ReservedUniformBlocks; | ||
| 95 | u32 num_ssbos = total_ssbos; | ||
| 96 | u32 num_samplers = total_samplers; | ||
| 97 | |||
| 98 | u32 base_ubo = ReservedUniformBlocks; | ||
| 99 | u32 base_ssbo = 0; | ||
| 100 | u32 base_samplers = 0; | ||
| 101 | |||
| 102 | for (std::size_t i = 0; i < NumStages; ++i) { | ||
| 103 | const std::size_t stage = stage_swizzle[i]; | ||
| 104 | bindings[stage] = { | ||
| 105 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]), | ||
| 106 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]), | ||
| 107 | Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])}; | ||
| 108 | } | ||
| 109 | |||
| 110 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); | ||
| 111 | u32 base_images = 0; | ||
| 112 | |||
| 113 | // Reserve more image bindings on fragment and vertex stages. | ||
| 114 | bindings[4].image = | ||
| 115 | Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); | ||
| 116 | bindings[0].image = | ||
| 117 | Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); | ||
| 118 | |||
| 119 | // Reserve the other image bindings. | ||
| 120 | const u32 total_extracted_images = num_images / (NumStages - 2); | ||
| 121 | for (std::size_t i = 2; i < NumStages; ++i) { | ||
| 122 | const std::size_t stage = stage_swizzle[i]; | ||
| 123 | bindings[stage].image = | ||
| 124 | Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); | ||
| 125 | } | ||
| 126 | |||
| 127 | // Compute doesn't care about any of this. | ||
| 128 | bindings[5] = {0, 0, 0, 0}; | ||
| 129 | |||
| 130 | return bindings; | ||
| 131 | } | ||
| 132 | |||
| 51 | } // Anonymous namespace | 133 | } // Anonymous namespace |
| 52 | 134 | ||
| 53 | Device::Device() { | 135 | Device::Device() : base_bindings{BuildBaseBindings()} { |
| 54 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 136 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 55 | const std::vector extensions = GetExtensions(); | 137 | const std::vector extensions = GetExtensions(); |
| 56 | 138 | ||
| 57 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 139 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 140 | const bool is_intel = vendor == "Intel"; | ||
| 58 | 141 | ||
| 59 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 142 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 60 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 143 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| @@ -68,6 +151,7 @@ Device::Device() { | |||
| 68 | has_variable_aoffi = TestVariableAoffi(); | 151 | has_variable_aoffi = TestVariableAoffi(); |
| 69 | has_component_indexing_bug = TestComponentIndexingBug(); | 152 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 70 | has_precise_bug = TestPreciseBug(); | 153 | has_precise_bug = TestPreciseBug(); |
| 154 | has_broken_compute = is_intel; | ||
| 71 | has_fast_buffer_sub_data = is_nvidia; | 155 | has_fast_buffer_sub_data = is_nvidia; |
| 72 | 156 | ||
| 73 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 157 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| @@ -85,6 +169,7 @@ Device::Device(std::nullptr_t) { | |||
| 85 | has_image_load_formatted = true; | 169 | has_image_load_formatted = true; |
| 86 | has_variable_aoffi = true; | 170 | has_variable_aoffi = true; |
| 87 | has_component_indexing_bug = false; | 171 | has_component_indexing_bug = false; |
| 172 | has_broken_compute = false; | ||
| 88 | has_precise_bug = false; | 173 | has_precise_bug = false; |
| 89 | } | 174 | } |
| 90 | 175 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 6c86fe207..5433815b9 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -6,14 +6,32 @@ | |||
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/engines/shader_type.h" | ||
| 9 | 10 | ||
| 10 | namespace OpenGL { | 11 | namespace OpenGL { |
| 11 | 12 | ||
| 12 | class Device { | 13 | static constexpr u32 EmulationUniformBlockBinding = 0; |
| 14 | |||
| 15 | class Device final { | ||
| 13 | public: | 16 | public: |
| 17 | struct BaseBindings final { | ||
| 18 | u32 uniform_buffer{}; | ||
| 19 | u32 shader_storage_buffer{}; | ||
| 20 | u32 sampler{}; | ||
| 21 | u32 image{}; | ||
| 22 | }; | ||
| 23 | |||
| 14 | explicit Device(); | 24 | explicit Device(); |
| 15 | explicit Device(std::nullptr_t); | 25 | explicit Device(std::nullptr_t); |
| 16 | 26 | ||
| 27 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | ||
| 28 | return base_bindings[stage_index]; | ||
| 29 | } | ||
| 30 | |||
| 31 | const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { | ||
| 32 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); | ||
| 33 | } | ||
| 34 | |||
| 17 | std::size_t GetUniformBufferAlignment() const { | 35 | std::size_t GetUniformBufferAlignment() const { |
| 18 | return uniform_buffer_alignment; | 36 | return uniform_buffer_alignment; |
| 19 | } | 37 | } |
| @@ -58,6 +76,10 @@ public: | |||
| 58 | return has_precise_bug; | 76 | return has_precise_bug; |
| 59 | } | 77 | } |
| 60 | 78 | ||
| 79 | bool HasBrokenCompute() const { | ||
| 80 | return has_broken_compute; | ||
| 81 | } | ||
| 82 | |||
| 61 | bool HasFastBufferSubData() const { | 83 | bool HasFastBufferSubData() const { |
| 62 | return has_fast_buffer_sub_data; | 84 | return has_fast_buffer_sub_data; |
| 63 | } | 85 | } |
| @@ -67,6 +89,7 @@ private: | |||
| 67 | static bool TestComponentIndexingBug(); | 89 | static bool TestComponentIndexingBug(); |
| 68 | static bool TestPreciseBug(); | 90 | static bool TestPreciseBug(); |
| 69 | 91 | ||
| 92 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; | ||
| 70 | std::size_t uniform_buffer_alignment{}; | 93 | std::size_t uniform_buffer_alignment{}; |
| 71 | std::size_t shader_storage_alignment{}; | 94 | std::size_t shader_storage_alignment{}; |
| 72 | u32 max_vertex_attributes{}; | 95 | u32 max_vertex_attributes{}; |
| @@ -78,6 +101,7 @@ private: | |||
| 78 | bool has_variable_aoffi{}; | 101 | bool has_variable_aoffi{}; |
| 79 | bool has_component_indexing_bug{}; | 102 | bool has_component_indexing_bug{}; |
| 80 | bool has_precise_bug{}; | 103 | bool has_precise_bug{}; |
| 104 | bool has_broken_compute{}; | ||
| 81 | bool has_fast_buffer_sub_data{}; | 105 | bool has_fast_buffer_sub_data{}; |
| 82 | }; | 106 | }; |
| 83 | 107 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 05f8e511b..f97ec06f0 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include "core/settings.h" | 22 | #include "core/settings.h" |
| 23 | #include "video_core/engines/kepler_compute.h" | 23 | #include "video_core/engines/kepler_compute.h" |
| 24 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/engines/shader_type.h" | ||
| 25 | #include "video_core/memory_manager.h" | 26 | #include "video_core/memory_manager.h" |
| 26 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 27 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 27 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 28 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -49,8 +50,25 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | |||
| 49 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 50 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); |
| 50 | MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); | 51 | MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); |
| 51 | 52 | ||
| 52 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 53 | namespace { |
| 53 | const GLShader::ConstBufferEntry& entry) { | 54 | |
| 55 | template <typename Engine, typename Entry> | ||
| 56 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | ||
| 57 | Tegra::Engines::ShaderType shader_type) { | ||
| 58 | if (entry.IsBindless()) { | ||
| 59 | const Tegra::Texture::TextureHandle tex_handle = | ||
| 60 | engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); | ||
| 61 | return engine.GetTextureInfo(tex_handle); | ||
| 62 | } | ||
| 63 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | ||
| 64 | return engine.GetStageTexture(shader_type, entry.GetOffset()); | ||
| 65 | } else { | ||
| 66 | return engine.GetTexture(entry.GetOffset()); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 71 | const GLShader::ConstBufferEntry& entry) { | ||
| 54 | if (!entry.IsIndirect()) { | 72 | if (!entry.IsIndirect()) { |
| 55 | return entry.GetSize(); | 73 | return entry.GetSize(); |
| 56 | } | 74 | } |
| @@ -64,6 +82,8 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf | |||
| 64 | return buffer.size; | 82 | return buffer.size; |
| 65 | } | 83 | } |
| 66 | 84 | ||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 67 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 87 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 68 | ScreenInfo& info) | 88 | ScreenInfo& info) |
| 69 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | 89 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| @@ -238,12 +258,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 238 | MICROPROFILE_SCOPE(OpenGL_Shader); | 258 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 239 | auto& gpu = system.GPU().Maxwell3D(); | 259 | auto& gpu = system.GPU().Maxwell3D(); |
| 240 | 260 | ||
| 241 | BaseBindings base_bindings; | ||
| 242 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 261 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 243 | 262 | ||
| 244 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 263 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 245 | const auto& shader_config = gpu.regs.shader_config[index]; | 264 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 246 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 265 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 247 | 266 | ||
| 248 | // Skip stages that are not enabled | 267 | // Skip stages that are not enabled |
| 249 | if (!gpu.regs.IsShaderConfigEnabled(index)) { | 268 | if (!gpu.regs.IsShaderConfigEnabled(index)) { |
| @@ -257,24 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 257 | continue; | 276 | continue; |
| 258 | } | 277 | } |
| 259 | 278 | ||
| 260 | GLShader::MaxwellUniformData ubo{}; | ||
| 261 | ubo.SetFromRegs(gpu); | ||
| 262 | const auto [buffer, offset] = | ||
| 263 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||
| 264 | |||
| 265 | // Bind the emulation info buffer | ||
| 266 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 267 | |||
| 268 | Shader shader{shader_cache.GetStageProgram(program)}; | 279 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 269 | 280 | ||
| 270 | // Stage indices are 0 - 5 | 281 | // Stage indices are 0 - 5 |
| 271 | const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1); | 282 | const std::size_t stage = index == 0 ? 0 : index - 1; |
| 272 | SetupDrawConstBuffers(stage, shader); | 283 | SetupDrawConstBuffers(stage, shader); |
| 273 | SetupDrawGlobalMemory(stage, shader); | 284 | SetupDrawGlobalMemory(stage, shader); |
| 274 | const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)}; | 285 | SetupDrawTextures(stage, shader); |
| 286 | SetupDrawImages(stage, shader); | ||
| 275 | 287 | ||
| 276 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | 288 | const ProgramVariant variant(primitive_mode); |
| 277 | const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); | 289 | const auto program_handle = shader->GetHandle(variant); |
| 278 | 290 | ||
| 279 | switch (program) { | 291 | switch (program) { |
| 280 | case Maxwell::ShaderProgram::VertexA: | 292 | case Maxwell::ShaderProgram::VertexA: |
| @@ -303,10 +315,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 303 | // When VertexA is enabled, we have dual vertex shaders | 315 | // When VertexA is enabled, we have dual vertex shaders |
| 304 | if (program == Maxwell::ShaderProgram::VertexA) { | 316 | if (program == Maxwell::ShaderProgram::VertexA) { |
| 305 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 317 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
| 306 | index++; | 318 | ++index; |
| 307 | } | 319 | } |
| 308 | |||
| 309 | base_bindings = next_bindings; | ||
| 310 | } | 320 | } |
| 311 | 321 | ||
| 312 | SyncClipEnabled(clip_distances); | 322 | SyncClipEnabled(clip_distances); |
| @@ -591,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() { | |||
| 591 | index_buffer_offset = SetupIndexBuffer(); | 601 | index_buffer_offset = SetupIndexBuffer(); |
| 592 | 602 | ||
| 593 | // Prepare packed bindings. | 603 | // Prepare packed bindings. |
| 594 | bind_ubo_pushbuffer.Setup(0); | 604 | bind_ubo_pushbuffer.Setup(); |
| 595 | bind_ssbo_pushbuffer.Setup(0); | 605 | bind_ssbo_pushbuffer.Setup(); |
| 606 | |||
| 607 | // Setup emulation uniform buffer. | ||
| 608 | GLShader::MaxwellUniformData ubo; | ||
| 609 | ubo.SetFromRegs(gpu); | ||
| 610 | const auto [buffer, offset] = | ||
| 611 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||
| 612 | bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, | ||
| 613 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 596 | 614 | ||
| 597 | // Setup shaders and their used resources. | 615 | // Setup shaders and their used resources. |
| 598 | texture_cache.GuardSamplers(true); | 616 | texture_cache.GuardSamplers(true); |
| @@ -725,19 +743,21 @@ bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) { | |||
| 725 | } | 743 | } |
| 726 | 744 | ||
| 727 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 745 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| 728 | if (!GLAD_GL_ARB_compute_variable_group_size) { | 746 | if (device.HasBrokenCompute()) { |
| 729 | LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||
| 730 | "lack of GL_ARB_compute_variable_group_size"); | ||
| 731 | return; | 747 | return; |
| 732 | } | 748 | } |
| 733 | 749 | ||
| 750 | buffer_cache.Acquire(); | ||
| 751 | |||
| 734 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 752 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 735 | ProgramVariant variant; | 753 | SetupComputeTextures(kernel); |
| 736 | variant.texture_buffer_usage = SetupComputeTextures(kernel); | ||
| 737 | SetupComputeImages(kernel); | 754 | SetupComputeImages(kernel); |
| 738 | 755 | ||
| 739 | const auto [program, next_bindings] = kernel->GetProgramHandle(variant); | 756 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 740 | state.draw.shader_program = program; | 757 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, |
| 758 | launch_desc.block_dim_z, launch_desc.shared_alloc, | ||
| 759 | launch_desc.local_pos_alloc); | ||
| 760 | state.draw.shader_program = kernel->GetHandle(variant); | ||
| 741 | state.draw.program_pipeline = 0; | 761 | state.draw.program_pipeline = 0; |
| 742 | 762 | ||
| 743 | const std::size_t buffer_size = | 763 | const std::size_t buffer_size = |
| @@ -745,8 +765,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 745 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 765 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 746 | buffer_cache.Map(buffer_size); | 766 | buffer_cache.Map(buffer_size); |
| 747 | 767 | ||
| 748 | bind_ubo_pushbuffer.Setup(0); | 768 | bind_ubo_pushbuffer.Setup(); |
| 749 | bind_ssbo_pushbuffer.Setup(0); | 769 | bind_ssbo_pushbuffer.Setup(); |
| 750 | 770 | ||
| 751 | SetupComputeConstBuffers(kernel); | 771 | SetupComputeConstBuffers(kernel); |
| 752 | SetupComputeGlobalMemory(kernel); | 772 | SetupComputeGlobalMemory(kernel); |
| @@ -761,10 +781,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 761 | state.ApplyShaderProgram(); | 781 | state.ApplyShaderProgram(); |
| 762 | state.ApplyProgramPipeline(); | 782 | state.ApplyProgramPipeline(); |
| 763 | 783 | ||
| 764 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 784 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 765 | glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||
| 766 | launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||
| 767 | launch_desc.block_dim_y, launch_desc.block_dim_z); | ||
| 768 | } | 785 | } |
| 769 | 786 | ||
| 770 | void RasterizerOpenGL::FlushAll() {} | 787 | void RasterizerOpenGL::FlushAll() {} |
| @@ -833,7 +850,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 833 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 850 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); |
| 834 | 851 | ||
| 835 | if (params.pixel_format != pixel_format) { | 852 | if (params.pixel_format != pixel_format) { |
| 836 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); | 853 | LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); |
| 837 | } | 854 | } |
| 838 | 855 | ||
| 839 | screen_info.display_texture = surface->GetTexture(); | 856 | screen_info.display_texture = surface->GetTexture(); |
| @@ -842,20 +859,23 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 842 | return true; | 859 | return true; |
| 843 | } | 860 | } |
| 844 | 861 | ||
| 845 | void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 862 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { |
| 846 | const Shader& shader) { | ||
| 847 | MICROPROFILE_SCOPE(OpenGL_UBO); | 863 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 848 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | 864 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 849 | const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; | 865 | const auto& shader_stage = stages[stage_index]; |
| 866 | |||
| 867 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | ||
| 850 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 868 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 851 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 869 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 852 | SetupConstBuffer(buffer, entry); | 870 | SetupConstBuffer(binding++, buffer, entry); |
| 853 | } | 871 | } |
| 854 | } | 872 | } |
| 855 | 873 | ||
| 856 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | 874 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { |
| 857 | MICROPROFILE_SCOPE(OpenGL_UBO); | 875 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 858 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 876 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 877 | |||
| 878 | u32 binding = 0; | ||
| 859 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | 879 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { |
| 860 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 880 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 861 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 881 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| @@ -863,15 +883,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 863 | buffer.address = config.Address(); | 883 | buffer.address = config.Address(); |
| 864 | buffer.size = config.size; | 884 | buffer.size = config.size; |
| 865 | buffer.enabled = mask[entry.GetIndex()]; | 885 | buffer.enabled = mask[entry.GetIndex()]; |
| 866 | SetupConstBuffer(buffer, entry); | 886 | SetupConstBuffer(binding++, buffer, entry); |
| 867 | } | 887 | } |
| 868 | } | 888 | } |
| 869 | 889 | ||
| 870 | void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 890 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 871 | const GLShader::ConstBufferEntry& entry) { | 891 | const GLShader::ConstBufferEntry& entry) { |
| 872 | if (!buffer.enabled) { | 892 | if (!buffer.enabled) { |
| 873 | // Set values to zero to unbind buffers | 893 | // Set values to zero to unbind buffers |
| 874 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | 894 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, |
| 895 | sizeof(float)); | ||
| 875 | return; | 896 | return; |
| 876 | } | 897 | } |
| 877 | 898 | ||
| @@ -882,19 +903,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 882 | const auto alignment = device.GetUniformBufferAlignment(); | 903 | const auto alignment = device.GetUniformBufferAlignment(); |
| 883 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, | 904 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, |
| 884 | device.HasFastBufferSubData()); | 905 | device.HasFastBufferSubData()); |
| 885 | bind_ubo_pushbuffer.Push(cbuf, offset, size); | 906 | bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); |
| 886 | } | 907 | } |
| 887 | 908 | ||
| 888 | void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 909 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { |
| 889 | const Shader& shader) { | ||
| 890 | auto& gpu{system.GPU()}; | 910 | auto& gpu{system.GPU()}; |
| 891 | auto& memory_manager{gpu.MemoryManager()}; | 911 | auto& memory_manager{gpu.MemoryManager()}; |
| 892 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | 912 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 913 | |||
| 914 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 893 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 915 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 894 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 916 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 895 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 917 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 896 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 918 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 897 | SetupGlobalMemory(entry, gpu_addr, size); | 919 | SetupGlobalMemory(binding++, entry, gpu_addr, size); |
| 898 | } | 920 | } |
| 899 | } | 921 | } |
| 900 | 922 | ||
| @@ -902,120 +924,82 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 902 | auto& gpu{system.GPU()}; | 924 | auto& gpu{system.GPU()}; |
| 903 | auto& memory_manager{gpu.MemoryManager()}; | 925 | auto& memory_manager{gpu.MemoryManager()}; |
| 904 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 926 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 927 | |||
| 928 | u32 binding = 0; | ||
| 905 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | 929 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { |
| 906 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | 930 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; |
| 907 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 931 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 908 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 932 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 909 | SetupGlobalMemory(entry, gpu_addr, size); | 933 | SetupGlobalMemory(binding++, entry, gpu_addr, size); |
| 910 | } | 934 | } |
| 911 | } | 935 | } |
| 912 | 936 | ||
| 913 | void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | 937 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, |
| 914 | GPUVAddr gpu_addr, std::size_t size) { | 938 | GPUVAddr gpu_addr, std::size_t size) { |
| 915 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 939 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 916 | const auto [ssbo, buffer_offset] = | 940 | const auto [ssbo, buffer_offset] = |
| 917 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); | 941 | buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); |
| 918 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | 942 | bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); |
| 919 | } | 943 | } |
| 920 | 944 | ||
| 921 | TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage, | 945 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { |
| 922 | const Shader& shader, | ||
| 923 | BaseBindings base_bindings) { | ||
| 924 | MICROPROFILE_SCOPE(OpenGL_Texture); | 946 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 925 | const auto& gpu = system.GPU(); | 947 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 926 | const auto& maxwell3d = gpu.Maxwell3D(); | 948 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 927 | const auto& entries = shader->GetShaderEntries().samplers; | 949 | for (const auto& entry : shader->GetShaderEntries().samplers) { |
| 928 | 950 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | |
| 929 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), | 951 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); |
| 930 | "Exceeded the number of active textures."); | 952 | SetupTexture(binding++, texture, entry); |
| 931 | |||
| 932 | TextureBufferUsage texture_buffer_usage{0}; | ||
| 933 | |||
| 934 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | ||
| 935 | const auto& entry = entries[bindpoint]; | ||
| 936 | const auto texture = [&] { | ||
| 937 | if (!entry.IsBindless()) { | ||
| 938 | return maxwell3d.GetStageTexture(stage, entry.GetOffset()); | ||
| 939 | } | ||
| 940 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||
| 941 | const Tegra::Texture::TextureHandle tex_handle = | ||
| 942 | maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); | ||
| 943 | return maxwell3d.GetTextureInfo(tex_handle); | ||
| 944 | }(); | ||
| 945 | |||
| 946 | if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) { | ||
| 947 | texture_buffer_usage.set(bindpoint); | ||
| 948 | } | ||
| 949 | } | 953 | } |
| 950 | |||
| 951 | return texture_buffer_usage; | ||
| 952 | } | 954 | } |
| 953 | 955 | ||
| 954 | TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | 956 | void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { |
| 955 | MICROPROFILE_SCOPE(OpenGL_Texture); | 957 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 956 | const auto& compute = system.GPU().KeplerCompute(); | 958 | const auto& compute = system.GPU().KeplerCompute(); |
| 957 | const auto& entries = kernel->GetShaderEntries().samplers; | 959 | u32 binding = 0; |
| 958 | 960 | for (const auto& entry : kernel->GetShaderEntries().samplers) { | |
| 959 | ASSERT_MSG(entries.size() <= std::size(state.textures), | 961 | const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); |
| 960 | "Exceeded the number of active textures."); | 962 | SetupTexture(binding++, texture, entry); |
| 961 | |||
| 962 | TextureBufferUsage texture_buffer_usage{0}; | ||
| 963 | |||
| 964 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | ||
| 965 | const auto& entry = entries[bindpoint]; | ||
| 966 | const auto texture = [&] { | ||
| 967 | if (!entry.IsBindless()) { | ||
| 968 | return compute.GetTexture(entry.GetOffset()); | ||
| 969 | } | ||
| 970 | const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( | ||
| 971 | Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); | ||
| 972 | return compute.GetTextureInfo(tex_handle); | ||
| 973 | }(); | ||
| 974 | |||
| 975 | if (SetupTexture(bindpoint, texture, entry)) { | ||
| 976 | texture_buffer_usage.set(bindpoint); | ||
| 977 | } | ||
| 978 | } | 963 | } |
| 979 | |||
| 980 | return texture_buffer_usage; | ||
| 981 | } | 964 | } |
| 982 | 965 | ||
| 983 | bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 966 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 984 | const GLShader::SamplerEntry& entry) { | 967 | const GLShader::SamplerEntry& entry) { |
| 985 | state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); | ||
| 986 | |||
| 987 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 968 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 988 | if (!view) { | 969 | if (!view) { |
| 989 | // Can occur when texture addr is null or its memory is unmapped/invalid | 970 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| 971 | state.samplers[binding] = 0; | ||
| 990 | state.textures[binding] = 0; | 972 | state.textures[binding] = 0; |
| 991 | return false; | 973 | return; |
| 992 | } | 974 | } |
| 993 | state.textures[binding] = view->GetTexture(); | 975 | state.textures[binding] = view->GetTexture(); |
| 994 | 976 | ||
| 995 | if (view->GetSurfaceParams().IsBuffer()) { | 977 | if (view->GetSurfaceParams().IsBuffer()) { |
| 996 | return true; | 978 | return; |
| 997 | } | 979 | } |
| 980 | state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); | ||
| 998 | 981 | ||
| 999 | // Apply swizzle to textures that are not buffers. | 982 | // Apply swizzle to textures that are not buffers. |
| 1000 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | 983 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, |
| 1001 | texture.tic.w_source); | 984 | texture.tic.w_source); |
| 1002 | return false; | 985 | } |
| 986 | |||
| 987 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | ||
| 988 | const auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 989 | u32 binding = device.GetBaseBindings(stage_index).image; | ||
| 990 | for (const auto& entry : shader->GetShaderEntries().images) { | ||
| 991 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | ||
| 992 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | ||
| 993 | SetupImage(binding++, tic, entry); | ||
| 994 | } | ||
| 1003 | } | 995 | } |
| 1004 | 996 | ||
| 1005 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 997 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { |
| 1006 | const auto& compute = system.GPU().KeplerCompute(); | 998 | const auto& compute = system.GPU().KeplerCompute(); |
| 1007 | const auto& entries = shader->GetShaderEntries().images; | 999 | u32 binding = 0; |
| 1008 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 1000 | for (const auto& entry : shader->GetShaderEntries().images) { |
| 1009 | const auto& entry = entries[bindpoint]; | 1001 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; |
| 1010 | const auto tic = [&] { | 1002 | SetupImage(binding++, tic, entry); |
| 1011 | if (!entry.IsBindless()) { | ||
| 1012 | return compute.GetTexture(entry.GetOffset()).tic; | ||
| 1013 | } | ||
| 1014 | const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32( | ||
| 1015 | Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset()); | ||
| 1016 | return compute.GetTextureInfo(tex_handle).tic; | ||
| 1017 | }(); | ||
| 1018 | SetupImage(bindpoint, tic, entry); | ||
| 1019 | } | 1003 | } |
| 1020 | } | 1004 | } |
| 1021 | 1005 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bd6fe5c3a..0e47d71df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -83,42 +83,41 @@ private: | |||
| 83 | bool using_depth_fb, bool using_stencil_fb); | 83 | bool using_depth_fb, bool using_stencil_fb); |
| 84 | 84 | ||
| 85 | /// Configures the current constbuffers to use for the draw command. | 85 | /// Configures the current constbuffers to use for the draw command. |
| 86 | void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 86 | void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); |
| 87 | const Shader& shader); | ||
| 88 | 87 | ||
| 89 | /// Configures the current constbuffers to use for the kernel invocation. | 88 | /// Configures the current constbuffers to use for the kernel invocation. |
| 90 | void SetupComputeConstBuffers(const Shader& kernel); | 89 | void SetupComputeConstBuffers(const Shader& kernel); |
| 91 | 90 | ||
| 92 | /// Configures a constant buffer. | 91 | /// Configures a constant buffer. |
| 93 | void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | 92 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 94 | const GLShader::ConstBufferEntry& entry); | 93 | const GLShader::ConstBufferEntry& entry); |
| 95 | 94 | ||
| 96 | /// Configures the current global memory entries to use for the draw command. | 95 | /// Configures the current global memory entries to use for the draw command. |
| 97 | void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 96 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); |
| 98 | const Shader& shader); | ||
| 99 | 97 | ||
| 100 | /// Configures the current global memory entries to use for the kernel invocation. | 98 | /// Configures the current global memory entries to use for the kernel invocation. |
| 101 | void SetupComputeGlobalMemory(const Shader& kernel); | 99 | void SetupComputeGlobalMemory(const Shader& kernel); |
| 102 | 100 | ||
| 103 | /// Configures a constant buffer. | 101 | /// Configures a constant buffer. |
| 104 | void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 102 | void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 105 | std::size_t size); | 103 | std::size_t size); |
| 106 | 104 | ||
| 107 | /// Syncs all the state, shaders, render targets and textures setting before a draw call. | 105 | /// Syncs all the state, shaders, render targets and textures setting before a draw call. |
| 108 | void DrawPrelude(); | 106 | void DrawPrelude(); |
| 109 | 107 | ||
| 110 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer | 108 | /// Configures the current textures to use for the draw command. |
| 111 | /// usage. | 109 | void SetupDrawTextures(std::size_t stage_index, const Shader& shader); |
| 112 | TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||
| 113 | const Shader& shader, BaseBindings base_bindings); | ||
| 114 | 110 | ||
| 115 | /// Configures the textures used in a compute shader. Returns texture buffer usage. | 111 | /// Configures the textures used in a compute shader. |
| 116 | TextureBufferUsage SetupComputeTextures(const Shader& kernel); | 112 | void SetupComputeTextures(const Shader& kernel); |
| 117 | 113 | ||
| 118 | /// Configures a texture. Returns true when the texture is a texture buffer. | 114 | /// Configures a texture. |
| 119 | bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 115 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 120 | const GLShader::SamplerEntry& entry); | 116 | const GLShader::SamplerEntry& entry); |
| 121 | 117 | ||
| 118 | /// Configures images in a graphics shader. | ||
| 119 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | ||
| 120 | |||
| 122 | /// Configures images in a compute shader. | 121 | /// Configures images in a compute shader. |
| 123 | void SetupComputeImages(const Shader& shader); | 122 | void SetupComputeImages(const Shader& shader); |
| 124 | 123 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 04a239a39..370bdf052 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -8,12 +8,15 @@ | |||
| 8 | #include <thread> | 8 | #include <thread> |
| 9 | #include <unordered_set> | 9 | #include <unordered_set> |
| 10 | #include <boost/functional/hash.hpp> | 10 | #include <boost/functional/hash.hpp> |
| 11 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | ||
| 12 | #include "common/scope_exit.h" | 14 | #include "common/scope_exit.h" |
| 13 | #include "core/core.h" | 15 | #include "core/core.h" |
| 14 | #include "core/frontend/emu_window.h" | 16 | #include "core/frontend/emu_window.h" |
| 15 | #include "video_core/engines/kepler_compute.h" | 17 | #include "video_core/engines/kepler_compute.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 19 | #include "video_core/engines/shader_type.h" | ||
| 17 | #include "video_core/memory_manager.h" | 20 | #include "video_core/memory_manager.h" |
| 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 21 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 19 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 22 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -82,28 +85,26 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| 82 | /// Gets the shader program code from memory for the specified address | 85 | /// Gets the shader program code from memory for the specified address |
| 83 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | 86 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, |
| 84 | const u8* host_ptr) { | 87 | const u8* host_ptr) { |
| 85 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | 88 | ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 86 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { | 89 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { |
| 87 | std::fill(program_code.begin(), program_code.end(), 0); | 90 | std::fill(code.begin(), code.end(), 0); |
| 88 | return program_code; | 91 | return code; |
| 89 | }); | 92 | }); |
| 90 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), | 93 | memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); |
| 91 | program_code.size() * sizeof(u64)); | 94 | code.resize(CalculateProgramSize(code)); |
| 92 | program_code.resize(CalculateProgramSize(program_code)); | 95 | return code; |
| 93 | return program_code; | ||
| 94 | } | 96 | } |
| 95 | 97 | ||
| 96 | /// Gets the shader type from a Maxwell program type | 98 | /// Gets the shader type from a Maxwell program type |
| 97 | constexpr GLenum GetShaderType(ProgramType program_type) { | 99 | constexpr GLenum GetGLShaderType(ShaderType shader_type) { |
| 98 | switch (program_type) { | 100 | switch (shader_type) { |
| 99 | case ProgramType::VertexA: | 101 | case ShaderType::Vertex: |
| 100 | case ProgramType::VertexB: | ||
| 101 | return GL_VERTEX_SHADER; | 102 | return GL_VERTEX_SHADER; |
| 102 | case ProgramType::Geometry: | 103 | case ShaderType::Geometry: |
| 103 | return GL_GEOMETRY_SHADER; | 104 | return GL_GEOMETRY_SHADER; |
| 104 | case ProgramType::Fragment: | 105 | case ShaderType::Fragment: |
| 105 | return GL_FRAGMENT_SHADER; | 106 | return GL_FRAGMENT_SHADER; |
| 106 | case ProgramType::Compute: | 107 | case ShaderType::Compute: |
| 107 | return GL_COMPUTE_SHADER; | 108 | return GL_COMPUTE_SHADER; |
| 108 | default: | 109 | default: |
| 109 | return GL_NONE; | 110 | return GL_NONE; |
| @@ -133,30 +134,11 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 133 | } | 134 | } |
| 134 | } | 135 | } |
| 135 | 136 | ||
| 136 | ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||
| 137 | switch (program) { | ||
| 138 | case Maxwell::ShaderProgram::VertexA: | ||
| 139 | return ProgramType::VertexA; | ||
| 140 | case Maxwell::ShaderProgram::VertexB: | ||
| 141 | return ProgramType::VertexB; | ||
| 142 | case Maxwell::ShaderProgram::TesselationControl: | ||
| 143 | return ProgramType::TessellationControl; | ||
| 144 | case Maxwell::ShaderProgram::TesselationEval: | ||
| 145 | return ProgramType::TessellationEval; | ||
| 146 | case Maxwell::ShaderProgram::Geometry: | ||
| 147 | return ProgramType::Geometry; | ||
| 148 | case Maxwell::ShaderProgram::Fragment: | ||
| 149 | return ProgramType::Fragment; | ||
| 150 | } | ||
| 151 | UNREACHABLE(); | ||
| 152 | return {}; | ||
| 153 | } | ||
| 154 | |||
| 155 | /// Hashes one (or two) program streams | 137 | /// Hashes one (or two) program streams |
| 156 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | 138 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, |
| 157 | const ProgramCode& code_b) { | 139 | const ProgramCode& code_b) { |
| 158 | u64 unique_identifier = boost::hash_value(code); | 140 | u64 unique_identifier = boost::hash_value(code); |
| 159 | if (program_type == ProgramType::VertexA) { | 141 | if (is_a) { |
| 160 | // VertexA programs include two programs | 142 | // VertexA programs include two programs |
| 161 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); | 143 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); |
| 162 | } | 144 | } |
| @@ -164,79 +146,74 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | |||
| 164 | } | 146 | } |
| 165 | 147 | ||
| 166 | /// Creates an unspecialized program from code streams | 148 | /// Creates an unspecialized program from code streams |
| 167 | std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, | 149 | std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, |
| 168 | const std::optional<ShaderIR>& ir_b) { | 150 | const std::optional<ShaderIR>& ir_b) { |
| 169 | switch (program_type) { | 151 | switch (shader_type) { |
| 170 | case ProgramType::VertexA: | 152 | case ShaderType::Vertex: |
| 171 | case ProgramType::VertexB: | ||
| 172 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); | 153 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); |
| 173 | case ProgramType::Geometry: | 154 | case ShaderType::Geometry: |
| 174 | return GLShader::GenerateGeometryShader(device, ir); | 155 | return GLShader::GenerateGeometryShader(device, ir); |
| 175 | case ProgramType::Fragment: | 156 | case ShaderType::Fragment: |
| 176 | return GLShader::GenerateFragmentShader(device, ir); | 157 | return GLShader::GenerateFragmentShader(device, ir); |
| 177 | case ProgramType::Compute: | 158 | case ShaderType::Compute: |
| 178 | return GLShader::GenerateComputeShader(device, ir); | 159 | return GLShader::GenerateComputeShader(device, ir); |
| 179 | default: | 160 | default: |
| 180 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); | 161 | UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type)); |
| 181 | return {}; | 162 | return {}; |
| 182 | } | 163 | } |
| 183 | } | 164 | } |
| 184 | 165 | ||
| 185 | constexpr const char* GetProgramTypeName(ProgramType program_type) { | 166 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { |
| 186 | switch (program_type) { | 167 | switch (shader_type) { |
| 187 | case ProgramType::VertexA: | 168 | case ShaderType::Vertex: |
| 188 | case ProgramType::VertexB: | ||
| 189 | return "VS"; | 169 | return "VS"; |
| 190 | case ProgramType::TessellationControl: | 170 | case ShaderType::TesselationControl: |
| 191 | return "TCS"; | 171 | return "HS"; |
| 192 | case ProgramType::TessellationEval: | 172 | case ShaderType::TesselationEval: |
| 193 | return "TES"; | 173 | return "DS"; |
| 194 | case ProgramType::Geometry: | 174 | case ShaderType::Geometry: |
| 195 | return "GS"; | 175 | return "GS"; |
| 196 | case ProgramType::Fragment: | 176 | case ShaderType::Fragment: |
| 197 | return "FS"; | 177 | return "FS"; |
| 198 | case ProgramType::Compute: | 178 | case ShaderType::Compute: |
| 199 | return "CS"; | 179 | return "CS"; |
| 200 | } | 180 | } |
| 201 | return "UNK"; | 181 | return "UNK"; |
| 202 | } | 182 | } |
| 203 | 183 | ||
| 204 | Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { | 184 | constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { |
| 205 | switch (program_type) { | 185 | switch (program_type) { |
| 206 | case ProgramType::VertexA: | 186 | case Maxwell::ShaderProgram::VertexA: |
| 207 | case ProgramType::VertexB: | 187 | case Maxwell::ShaderProgram::VertexB: |
| 208 | return Tegra::Engines::ShaderType::Vertex; | 188 | return ShaderType::Vertex; |
| 209 | case ProgramType::TessellationControl: | 189 | case Maxwell::ShaderProgram::TesselationControl: |
| 210 | return Tegra::Engines::ShaderType::TesselationControl; | 190 | return ShaderType::TesselationControl; |
| 211 | case ProgramType::TessellationEval: | 191 | case Maxwell::ShaderProgram::TesselationEval: |
| 212 | return Tegra::Engines::ShaderType::TesselationEval; | 192 | return ShaderType::TesselationEval; |
| 213 | case ProgramType::Geometry: | 193 | case Maxwell::ShaderProgram::Geometry: |
| 214 | return Tegra::Engines::ShaderType::Geometry; | 194 | return ShaderType::Geometry; |
| 215 | case ProgramType::Fragment: | 195 | case Maxwell::ShaderProgram::Fragment: |
| 216 | return Tegra::Engines::ShaderType::Fragment; | 196 | return ShaderType::Fragment; |
| 217 | case ProgramType::Compute: | 197 | } |
| 218 | return Tegra::Engines::ShaderType::Compute; | ||
| 219 | } | ||
| 220 | UNREACHABLE(); | ||
| 221 | return {}; | 198 | return {}; |
| 222 | } | 199 | } |
| 223 | 200 | ||
| 224 | std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { | 201 | std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { |
| 225 | return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); | 202 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 226 | } | 203 | } |
| 227 | 204 | ||
| 228 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( | 205 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, |
| 229 | Core::System& system, ProgramType program_type) { | 206 | ShaderType shader_type) { |
| 230 | if (program_type == ProgramType::Compute) { | 207 | if (shader_type == ShaderType::Compute) { |
| 231 | return system.GPU().KeplerCompute(); | 208 | return system.GPU().KeplerCompute(); |
| 232 | } else { | 209 | } else { |
| 233 | return system.GPU().Maxwell3D(); | 210 | return system.GPU().Maxwell3D(); |
| 234 | } | 211 | } |
| 235 | } | 212 | } |
| 236 | 213 | ||
| 237 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) { | 214 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) { |
| 238 | return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type), | 215 | return std::make_unique<ConstBufferLocker>(shader_type, |
| 239 | GetConstBufferEngineInterface(system, program_type)); | 216 | GetConstBufferEngineInterface(system, shader_type)); |
| 240 | } | 217 | } |
| 241 | 218 | ||
| 242 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | 219 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { |
| @@ -253,33 +230,26 @@ void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | |||
| 253 | } | 230 | } |
| 254 | } | 231 | } |
| 255 | 232 | ||
| 256 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, | 233 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, |
| 257 | const ProgramCode& program_code, const ProgramCode& program_code_b, | 234 | const ProgramCode& code, const ProgramCode& code_b, |
| 258 | const ProgramVariant& variant, ConstBufferLocker& locker, | 235 | ConstBufferLocker& locker, const ProgramVariant& variant, |
| 259 | bool hint_retrievable = false) { | 236 | bool hint_retrievable = false) { |
| 260 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); | 237 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); |
| 261 | 238 | ||
| 262 | const bool is_compute = program_type == ProgramType::Compute; | 239 | const bool is_compute = shader_type == ShaderType::Compute; |
| 263 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | 240 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 264 | const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); | 241 | const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); |
| 265 | std::optional<ShaderIR> ir_b; | 242 | std::optional<ShaderIR> ir_b; |
| 266 | if (!program_code_b.empty()) { | 243 | if (!code_b.empty()) { |
| 267 | ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); | 244 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); |
| 268 | } | 245 | } |
| 269 | const auto entries = GLShader::GetEntries(ir); | 246 | const auto entries = GLShader::GetEntries(ir); |
| 270 | 247 | ||
| 271 | auto base_bindings{variant.base_bindings}; | ||
| 272 | const auto primitive_mode{variant.primitive_mode}; | ||
| 273 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | ||
| 274 | |||
| 275 | std::string source = fmt::format(R"(// {} | 248 | std::string source = fmt::format(R"(// {} |
| 276 | #version 430 core | 249 | #version 430 core |
| 277 | #extension GL_ARB_separate_shader_objects : enable | 250 | #extension GL_ARB_separate_shader_objects : enable |
| 278 | )", | 251 | )", |
| 279 | GetShaderId(unique_identifier, program_type)); | 252 | GetShaderId(unique_identifier, shader_type)); |
| 280 | if (is_compute) { | ||
| 281 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||
| 282 | } | ||
| 283 | if (device.HasShaderBallot()) { | 253 | if (device.HasShaderBallot()) { |
| 284 | source += "#extension GL_ARB_shader_ballot : require\n"; | 254 | source += "#extension GL_ARB_shader_ballot : require\n"; |
| 285 | } | 255 | } |
| @@ -296,54 +266,35 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramTy | |||
| 296 | } | 266 | } |
| 297 | source += '\n'; | 267 | source += '\n'; |
| 298 | 268 | ||
| 299 | if (!is_compute) { | 269 | if (shader_type == ShaderType::Geometry) { |
| 300 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 270 | const auto [glsl_topology, debug_name, max_vertices] = |
| 301 | } | 271 | GetPrimitiveDescription(variant.primitive_mode); |
| 302 | 272 | ||
| 303 | for (const auto& cbuf : entries.const_buffers) { | 273 | source += fmt::format("layout ({}) in;\n\n", glsl_topology); |
| 304 | source += | 274 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); |
| 305 | fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); | ||
| 306 | } | 275 | } |
| 307 | for (const auto& gmem : entries.global_memory_entries) { | 276 | if (shader_type == ShaderType::Compute) { |
| 308 | source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), | ||
| 309 | gmem.GetCbufOffset(), base_bindings.gmem++); | ||
| 310 | } | ||
| 311 | for (const auto& sampler : entries.samplers) { | ||
| 312 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | ||
| 313 | base_bindings.sampler++); | ||
| 314 | } | ||
| 315 | for (const auto& image : entries.images) { | ||
| 316 | source += | 277 | source += |
| 317 | fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); | 278 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", |
| 318 | } | 279 | variant.block_x, variant.block_y, variant.block_z); |
| 319 | 280 | ||
| 320 | // Transform 1D textures to texture samplers by declaring its preprocessor macros. | 281 | if (variant.shared_memory_size > 0) { |
| 321 | for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { | 282 | // TODO(Rodrigo): We should divide by four here, but having a larger shared memory pool |
| 322 | if (!texture_buffer_usage.test(i)) { | 283 | // avoids out of bound stores. Find out why shared memory size is being invalid. |
| 323 | continue; | 284 | source += fmt::format("shared uint smem[{}];", variant.shared_memory_size); |
| 324 | } | 285 | } |
| 325 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i); | ||
| 326 | } | ||
| 327 | if (texture_buffer_usage.any()) { | ||
| 328 | source += '\n'; | ||
| 329 | } | ||
| 330 | 286 | ||
| 331 | if (program_type == ProgramType::Geometry) { | 287 | if (variant.local_memory_size > 0) { |
| 332 | const auto [glsl_topology, debug_name, max_vertices] = | 288 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}", |
| 333 | GetPrimitiveDescription(primitive_mode); | 289 | Common::AlignUp(variant.local_memory_size, 4) / 4); |
| 334 | 290 | } | |
| 335 | source += "layout (" + std::string(glsl_topology) + ") in;\n\n"; | ||
| 336 | source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | ||
| 337 | } | ||
| 338 | if (program_type == ProgramType::Compute) { | ||
| 339 | source += "layout (local_size_variable) in;\n"; | ||
| 340 | } | 291 | } |
| 341 | 292 | ||
| 342 | source += '\n'; | 293 | source += '\n'; |
| 343 | source += GenerateGLSL(device, program_type, ir, ir_b); | 294 | source += GenerateGLSL(device, shader_type, ir, ir_b); |
| 344 | 295 | ||
| 345 | OGLShader shader; | 296 | OGLShader shader; |
| 346 | shader.Create(source.c_str(), GetShaderType(program_type)); | 297 | shader.Create(source.c_str(), GetGLShaderType(shader_type)); |
| 347 | 298 | ||
| 348 | auto program = std::make_shared<OGLProgram>(); | 299 | auto program = std::make_shared<OGLProgram>(); |
| 349 | program->Create(true, hint_retrievable, shader.handle); | 300 | program->Create(true, hint_retrievable, shader.handle); |
| @@ -366,18 +317,16 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 366 | 317 | ||
| 367 | } // Anonymous namespace | 318 | } // Anonymous namespace |
| 368 | 319 | ||
| 369 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | 320 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, |
| 370 | GLShader::ShaderEntries entries, ProgramCode program_code, | 321 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) |
| 371 | ProgramCode program_code_b) | 322 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache}, |
| 372 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, | 323 | device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier}, |
| 373 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, | 324 | shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} { |
| 374 | unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, | ||
| 375 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { | ||
| 376 | if (!params.precompiled_variants) { | 325 | if (!params.precompiled_variants) { |
| 377 | return; | 326 | return; |
| 378 | } | 327 | } |
| 379 | for (const auto& pair : *params.precompiled_variants) { | 328 | for (const auto& pair : *params.precompiled_variants) { |
| 380 | auto locker = MakeLocker(system, program_type); | 329 | auto locker = MakeLocker(system, shader_type); |
| 381 | const auto& usage = pair->first; | 330 | const auto& usage = pair->first; |
| 382 | FillLocker(*locker, usage); | 331 | FillLocker(*locker, usage); |
| 383 | 332 | ||
| @@ -398,94 +347,83 @@ CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_t | |||
| 398 | } | 347 | } |
| 399 | 348 | ||
| 400 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 349 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 401 | Maxwell::ShaderProgram program_type, | 350 | Maxwell::ShaderProgram program_type, ProgramCode code, |
| 402 | ProgramCode program_code, ProgramCode program_code_b) { | 351 | ProgramCode code_b) { |
| 403 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 352 | const auto shader_type = GetShaderType(program_type); |
| 404 | params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); | 353 | params.disk_cache.SaveRaw( |
| 405 | 354 | ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); | |
| 406 | ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)), | 355 | |
| 407 | params.system.GPU().Maxwell3D()); | 356 | ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); |
| 408 | const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 357 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); |
| 409 | // TODO(Rodrigo): Handle VertexA shaders | 358 | // TODO(Rodrigo): Handle VertexA shaders |
| 410 | // std::optional<ShaderIR> ir_b; | 359 | // std::optional<ShaderIR> ir_b; |
| 411 | // if (!program_code_b.empty()) { | 360 | // if (!code_b.empty()) { |
| 412 | // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); | 361 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); |
| 413 | // } | 362 | // } |
| 414 | return std::shared_ptr<CachedShader>( | 363 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 415 | new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), | 364 | params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); |
| 416 | std::move(program_code), std::move(program_code_b))); | ||
| 417 | } | 365 | } |
| 418 | 366 | ||
| 419 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 367 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 420 | params.disk_cache.SaveRaw( | 368 | params.disk_cache.SaveRaw( |
| 421 | ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); | 369 | ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); |
| 422 | 370 | ||
| 423 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, | 371 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, |
| 424 | params.system.GPU().KeplerCompute()); | 372 | params.system.GPU().KeplerCompute()); |
| 425 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 373 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); |
| 426 | return std::shared_ptr<CachedShader>(new CachedShader( | 374 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 427 | params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | 375 | params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); |
| 428 | } | 376 | } |
| 429 | 377 | ||
| 430 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 378 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 431 | const UnspecializedShader& unspecialized) { | 379 | const UnspecializedShader& unspecialized) { |
| 432 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, | 380 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, |
| 433 | unspecialized.entries, unspecialized.code, | 381 | unspecialized.entries, unspecialized.code, |
| 434 | unspecialized.code_b)); | 382 | unspecialized.code_b)); |
| 435 | } | 383 | } |
| 436 | 384 | ||
| 437 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 385 | GLuint CachedShader::GetHandle(const ProgramVariant& variant) { |
| 438 | UpdateVariant(); | 386 | EnsureValidLockerVariant(); |
| 439 | 387 | ||
| 440 | const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); | 388 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); |
| 441 | auto& program = entry->second; | 389 | auto& program = entry->second; |
| 442 | if (is_cache_miss) { | 390 | if (!is_cache_miss) { |
| 443 | program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, | 391 | return program->handle; |
| 444 | variant, *curr_variant->locker); | ||
| 445 | disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); | ||
| 446 | |||
| 447 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 448 | } | 392 | } |
| 449 | 393 | ||
| 450 | auto base_bindings = variant.base_bindings; | 394 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, |
| 451 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); | 395 | *curr_locker_variant->locker, variant); |
| 452 | if (program_type != ProgramType::Compute) { | 396 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); |
| 453 | base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||
| 454 | } | ||
| 455 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||
| 456 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||
| 457 | 397 | ||
| 458 | return {program->handle, base_bindings}; | 398 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| 399 | return program->handle; | ||
| 459 | } | 400 | } |
| 460 | 401 | ||
| 461 | void CachedShader::UpdateVariant() { | 402 | bool CachedShader::EnsureValidLockerVariant() { |
| 462 | if (curr_variant && !curr_variant->locker->IsConsistent()) { | 403 | const auto previous_variant = curr_locker_variant; |
| 463 | curr_variant = nullptr; | 404 | if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { |
| 405 | curr_locker_variant = nullptr; | ||
| 464 | } | 406 | } |
| 465 | if (!curr_variant) { | 407 | if (!curr_locker_variant) { |
| 466 | for (auto& variant : locker_variants) { | 408 | for (auto& variant : locker_variants) { |
| 467 | if (variant->locker->IsConsistent()) { | 409 | if (variant->locker->IsConsistent()) { |
| 468 | curr_variant = variant.get(); | 410 | curr_locker_variant = variant.get(); |
| 469 | } | 411 | } |
| 470 | } | 412 | } |
| 471 | } | 413 | } |
| 472 | if (!curr_variant) { | 414 | if (!curr_locker_variant) { |
| 473 | auto& new_variant = locker_variants.emplace_back(); | 415 | auto& new_variant = locker_variants.emplace_back(); |
| 474 | new_variant = std::make_unique<LockerVariant>(); | 416 | new_variant = std::make_unique<LockerVariant>(); |
| 475 | new_variant->locker = MakeLocker(system, program_type); | 417 | new_variant->locker = MakeLocker(system, shader_type); |
| 476 | curr_variant = new_variant.get(); | 418 | curr_locker_variant = new_variant.get(); |
| 477 | } | 419 | } |
| 420 | return previous_variant == curr_locker_variant; | ||
| 478 | } | 421 | } |
| 479 | 422 | ||
| 480 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | 423 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, |
| 481 | const ConstBufferLocker& locker) const { | 424 | const ConstBufferLocker& locker) const { |
| 482 | ShaderDiskCacheUsage usage; | 425 | return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), |
| 483 | usage.unique_identifier = unique_identifier; | 426 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; |
| 484 | usage.variant = variant; | ||
| 485 | usage.keys = locker.GetKeys(); | ||
| 486 | usage.bound_samplers = locker.GetBoundSamplers(); | ||
| 487 | usage.bindless_samplers = locker.GetBindlessSamplers(); | ||
| 488 | return usage; | ||
| 489 | } | 427 | } |
| 490 | 428 | ||
| 491 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 429 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -544,11 +482,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 544 | } | 482 | } |
| 545 | } | 483 | } |
| 546 | if (!shader) { | 484 | if (!shader) { |
| 547 | auto locker{MakeLocker(system, unspecialized.program_type)}; | 485 | auto locker{MakeLocker(system, unspecialized.type)}; |
| 548 | FillLocker(*locker, usage); | 486 | FillLocker(*locker, usage); |
| 549 | shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, | 487 | |
| 550 | unspecialized.code, unspecialized.code_b, usage.variant, | 488 | shader = BuildShader(device, usage.unique_identifier, unspecialized.type, |
| 551 | *locker, true); | 489 | unspecialized.code, unspecialized.code_b, *locker, |
| 490 | usage.variant, true); | ||
| 552 | } | 491 | } |
| 553 | 492 | ||
| 554 | std::scoped_lock lock{mutex}; | 493 | std::scoped_lock lock{mutex}; |
| @@ -651,7 +590,7 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | |||
| 651 | const auto& raw{raws[i]}; | 590 | const auto& raw{raws[i]}; |
| 652 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | 591 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; |
| 653 | const u64 calculated_hash{ | 592 | const u64 calculated_hash{ |
| 654 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; | 593 | GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; |
| 655 | if (unique_identifier != calculated_hash) { | 594 | if (unique_identifier != calculated_hash) { |
| 656 | LOG_ERROR(Render_OpenGL, | 595 | LOG_ERROR(Render_OpenGL, |
| 657 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " | 596 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " |
| @@ -662,9 +601,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | |||
| 662 | } | 601 | } |
| 663 | 602 | ||
| 664 | const u32 main_offset = | 603 | const u32 main_offset = |
| 665 | raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | 604 | raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 666 | ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); | 605 | ConstBufferLocker locker(raw.GetType()); |
| 667 | const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); | 606 | const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); |
| 668 | // TODO(Rodrigo): Handle VertexA shaders | 607 | // TODO(Rodrigo): Handle VertexA shaders |
| 669 | // std::optional<ShaderIR> ir_b; | 608 | // std::optional<ShaderIR> ir_b; |
| 670 | // if (raw.HasProgramA()) { | 609 | // if (raw.HasProgramA()) { |
| @@ -673,9 +612,9 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | |||
| 673 | 612 | ||
| 674 | UnspecializedShader unspecialized; | 613 | UnspecializedShader unspecialized; |
| 675 | unspecialized.entries = GLShader::GetEntries(ir); | 614 | unspecialized.entries = GLShader::GetEntries(ir); |
| 676 | unspecialized.program_type = raw.GetProgramType(); | 615 | unspecialized.type = raw.GetType(); |
| 677 | unspecialized.code = raw.GetProgramCode(); | 616 | unspecialized.code = raw.GetCode(); |
| 678 | unspecialized.code_b = raw.GetProgramCodeB(); | 617 | unspecialized.code_b = raw.GetCodeB(); |
| 679 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); | 618 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); |
| 680 | 619 | ||
| 681 | if (callback) { | 620 | if (callback) { |
| @@ -708,7 +647,8 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 708 | code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); | 647 | code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); |
| 709 | } | 648 | } |
| 710 | 649 | ||
| 711 | const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); | 650 | const auto unique_identifier = GetUniqueIdentifier( |
| 651 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | ||
| 712 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | 652 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); |
| 713 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 653 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 714 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 654 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, |
| @@ -736,7 +676,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 736 | 676 | ||
| 737 | // No kernel found - create a new one | 677 | // No kernel found - create a new one |
| 738 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 678 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 739 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | 679 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; |
| 740 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | 680 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); |
| 741 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 681 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 742 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 682 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 6bd7c9cf1..7b1470db3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <glad/glad.h> | 17 | #include <glad/glad.h> |
| 18 | 18 | ||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/shader_type.h" | ||
| 20 | #include "video_core/rasterizer_cache.h" | 21 | #include "video_core/rasterizer_cache.h" |
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -47,7 +48,7 @@ using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; | |||
| 47 | 48 | ||
| 48 | struct UnspecializedShader { | 49 | struct UnspecializedShader { |
| 49 | GLShader::ShaderEntries entries; | 50 | GLShader::ShaderEntries entries; |
| 50 | ProgramType program_type; | 51 | Tegra::Engines::ShaderType type; |
| 51 | ProgramCode code; | 52 | ProgramCode code; |
| 52 | ProgramCode code_b; | 53 | ProgramCode code_b; |
| 53 | }; | 54 | }; |
| @@ -77,7 +78,7 @@ public: | |||
| 77 | } | 78 | } |
| 78 | 79 | ||
| 79 | std::size_t GetSizeInBytes() const override { | 80 | std::size_t GetSizeInBytes() const override { |
| 80 | return program_code.size() * sizeof(u64); | 81 | return code.size() * sizeof(u64); |
| 81 | } | 82 | } |
| 82 | 83 | ||
| 83 | /// Gets the shader entries for the shader | 84 | /// Gets the shader entries for the shader |
| @@ -86,7 +87,7 @@ public: | |||
| 86 | } | 87 | } |
| 87 | 88 | ||
| 88 | /// Gets the GL program handle for the shader | 89 | /// Gets the GL program handle for the shader |
| 89 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 90 | GLuint GetHandle(const ProgramVariant& variant); |
| 90 | 91 | ||
| 91 | private: | 92 | private: |
| 92 | struct LockerVariant { | 93 | struct LockerVariant { |
| @@ -94,11 +95,11 @@ private: | |||
| 94 | std::unordered_map<ProgramVariant, CachedProgram> programs; | 95 | std::unordered_map<ProgramVariant, CachedProgram> programs; |
| 95 | }; | 96 | }; |
| 96 | 97 | ||
| 97 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, | 98 | explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, |
| 98 | GLShader::ShaderEntries entries, ProgramCode program_code, | 99 | GLShader::ShaderEntries entries, ProgramCode program_code, |
| 99 | ProgramCode program_code_b); | 100 | ProgramCode program_code_b); |
| 100 | 101 | ||
| 101 | void UpdateVariant(); | 102 | bool EnsureValidLockerVariant(); |
| 102 | 103 | ||
| 103 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, | 104 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, |
| 104 | const VideoCommon::Shader::ConstBufferLocker& locker) const; | 105 | const VideoCommon::Shader::ConstBufferLocker& locker) const; |
| @@ -110,14 +111,14 @@ private: | |||
| 110 | VAddr cpu_addr{}; | 111 | VAddr cpu_addr{}; |
| 111 | 112 | ||
| 112 | u64 unique_identifier{}; | 113 | u64 unique_identifier{}; |
| 113 | ProgramType program_type{}; | 114 | Tegra::Engines::ShaderType shader_type{}; |
| 114 | 115 | ||
| 115 | GLShader::ShaderEntries entries; | 116 | GLShader::ShaderEntries entries; |
| 116 | 117 | ||
| 117 | ProgramCode program_code; | 118 | ProgramCode code; |
| 118 | ProgramCode program_code_b; | 119 | ProgramCode code_b; |
| 119 | 120 | ||
| 120 | LockerVariant* curr_variant = nullptr; | 121 | LockerVariant* curr_locker_variant = nullptr; |
| 121 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; | 122 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; |
| 122 | }; | 123 | }; |
| 123 | 124 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4f2b49170..b17c4e703 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 18 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 19 | #include "video_core/engines/shader_type.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | 20 | #include "video_core/renderer_opengl/gl_device.h" |
| 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 21 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -27,6 +28,7 @@ namespace OpenGL::GLShader { | |||
| 27 | 28 | ||
| 28 | namespace { | 29 | namespace { |
| 29 | 30 | ||
| 31 | using Tegra::Engines::ShaderType; | ||
| 30 | using Tegra::Shader::Attribute; | 32 | using Tegra::Shader::Attribute; |
| 31 | using Tegra::Shader::AttributeUse; | 33 | using Tegra::Shader::AttributeUse; |
| 32 | using Tegra::Shader::Header; | 34 | using Tegra::Shader::Header; |
| @@ -41,6 +43,9 @@ using namespace VideoCommon::Shader; | |||
| 41 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 43 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 42 | using Operation = const OperationNode&; | 44 | using Operation = const OperationNode&; |
| 43 | 45 | ||
| 46 | class ASTDecompiler; | ||
| 47 | class ExprDecompiler; | ||
| 48 | |||
| 44 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | 49 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 45 | 50 | ||
| 46 | struct TextureAoffi {}; | 51 | struct TextureAoffi {}; |
| @@ -223,7 +228,7 @@ private: | |||
| 223 | Type type{}; | 228 | Type type{}; |
| 224 | }; | 229 | }; |
| 225 | 230 | ||
| 226 | constexpr const char* GetTypeString(Type type) { | 231 | const char* GetTypeString(Type type) { |
| 227 | switch (type) { | 232 | switch (type) { |
| 228 | case Type::Bool: | 233 | case Type::Bool: |
| 229 | return "bool"; | 234 | return "bool"; |
| @@ -243,7 +248,7 @@ constexpr const char* GetTypeString(Type type) { | |||
| 243 | } | 248 | } |
| 244 | } | 249 | } |
| 245 | 250 | ||
| 246 | constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | 251 | const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { |
| 247 | switch (image_type) { | 252 | switch (image_type) { |
| 248 | case Tegra::Shader::ImageType::Texture1D: | 253 | case Tegra::Shader::ImageType::Texture1D: |
| 249 | return "1D"; | 254 | return "1D"; |
| @@ -331,16 +336,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 331 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 336 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 332 | } | 337 | } |
| 333 | 338 | ||
| 334 | constexpr bool IsVertexShader(ProgramType stage) { | 339 | [[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { |
| 335 | return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | 340 | return stage == ShaderType::Vertex; |
| 336 | } | 341 | } |
| 337 | 342 | ||
| 338 | class ASTDecompiler; | ||
| 339 | class ExprDecompiler; | ||
| 340 | |||
| 341 | class GLSLDecompiler final { | 343 | class GLSLDecompiler final { |
| 342 | public: | 344 | public: |
| 343 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, | 345 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, |
| 344 | std::string suffix) | 346 | std::string suffix) |
| 345 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 347 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |
| 346 | 348 | ||
| @@ -427,7 +429,7 @@ private: | |||
| 427 | } | 429 | } |
| 428 | 430 | ||
| 429 | void DeclareGeometry() { | 431 | void DeclareGeometry() { |
| 430 | if (stage != ProgramType::Geometry) { | 432 | if (stage != ShaderType::Geometry) { |
| 431 | return; | 433 | return; |
| 432 | } | 434 | } |
| 433 | 435 | ||
| @@ -510,10 +512,14 @@ private: | |||
| 510 | } | 512 | } |
| 511 | 513 | ||
| 512 | void DeclareLocalMemory() { | 514 | void DeclareLocalMemory() { |
| 513 | // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at | 515 | if (stage == ShaderType::Compute) { |
| 514 | // specialization time. | 516 | code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); |
| 515 | const u64 local_memory_size = | 517 | code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); |
| 516 | stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); | 518 | code.AddLine("#endif"); |
| 519 | return; | ||
| 520 | } | ||
| 521 | |||
| 522 | const u64 local_memory_size = header.GetLocalMemorySize(); | ||
| 517 | if (local_memory_size == 0) { | 523 | if (local_memory_size == 0) { |
| 518 | return; | 524 | return; |
| 519 | } | 525 | } |
| @@ -522,13 +528,6 @@ private: | |||
| 522 | code.AddNewLine(); | 528 | code.AddNewLine(); |
| 523 | } | 529 | } |
| 524 | 530 | ||
| 525 | void DeclareSharedMemory() { | ||
| 526 | if (stage != ProgramType::Compute) { | ||
| 527 | return; | ||
| 528 | } | ||
| 529 | code.AddLine("shared uint {}[];", GetSharedMemory()); | ||
| 530 | } | ||
| 531 | |||
| 532 | void DeclareInternalFlags() { | 531 | void DeclareInternalFlags() { |
| 533 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | 532 | for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { |
| 534 | const auto flag_code = static_cast<InternalFlag>(flag); | 533 | const auto flag_code = static_cast<InternalFlag>(flag); |
| @@ -578,12 +577,12 @@ private: | |||
| 578 | const u32 location{GetGenericAttributeIndex(index)}; | 577 | const u32 location{GetGenericAttributeIndex(index)}; |
| 579 | 578 | ||
| 580 | std::string name{GetInputAttribute(index)}; | 579 | std::string name{GetInputAttribute(index)}; |
| 581 | if (stage == ProgramType::Geometry) { | 580 | if (stage == ShaderType::Geometry) { |
| 582 | name = "gs_" + name + "[]"; | 581 | name = "gs_" + name + "[]"; |
| 583 | } | 582 | } |
| 584 | 583 | ||
| 585 | std::string suffix; | 584 | std::string suffix; |
| 586 | if (stage == ProgramType::Fragment) { | 585 | if (stage == ShaderType::Fragment) { |
| 587 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 586 | const auto input_mode{header.ps.GetAttributeUse(location)}; |
| 588 | if (skip_unused && input_mode == AttributeUse::Unused) { | 587 | if (skip_unused && input_mode == AttributeUse::Unused) { |
| 589 | return; | 588 | return; |
| @@ -595,7 +594,7 @@ private: | |||
| 595 | } | 594 | } |
| 596 | 595 | ||
| 597 | void DeclareOutputAttributes() { | 596 | void DeclareOutputAttributes() { |
| 598 | if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { | 597 | if (ir.HasPhysicalAttributes() && stage != ShaderType::Fragment) { |
| 599 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | 598 | for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { |
| 600 | DeclareOutputAttribute(ToGenericAttribute(i)); | 599 | DeclareOutputAttribute(ToGenericAttribute(i)); |
| 601 | } | 600 | } |
| @@ -620,9 +619,9 @@ private: | |||
| 620 | } | 619 | } |
| 621 | 620 | ||
| 622 | void DeclareConstantBuffers() { | 621 | void DeclareConstantBuffers() { |
| 623 | for (const auto& entry : ir.GetConstantBuffers()) { | 622 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; |
| 624 | const auto [index, size] = entry; | 623 | for (const auto& [index, cbuf] : ir.GetConstantBuffers()) { |
| 625 | code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, | 624 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, |
| 626 | GetConstBufferBlock(index)); | 625 | GetConstBufferBlock(index)); |
| 627 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); | 626 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); |
| 628 | code.AddLine("}};"); | 627 | code.AddLine("}};"); |
| @@ -631,9 +630,8 @@ private: | |||
| 631 | } | 630 | } |
| 632 | 631 | ||
| 633 | void DeclareGlobalMemory() { | 632 | void DeclareGlobalMemory() { |
| 634 | for (const auto& gmem : ir.GetGlobalMemory()) { | 633 | u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; |
| 635 | const auto& [base, usage] = gmem; | 634 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { |
| 636 | |||
| 637 | // Since we don't know how the shader will use the shader, hint the driver to disable as | 635 | // Since we don't know how the shader will use the shader, hint the driver to disable as |
| 638 | // much optimizations as possible | 636 | // much optimizations as possible |
| 639 | std::string qualifier = "coherent volatile"; | 637 | std::string qualifier = "coherent volatile"; |
| @@ -643,8 +641,8 @@ private: | |||
| 643 | qualifier += " writeonly"; | 641 | qualifier += " writeonly"; |
| 644 | } | 642 | } |
| 645 | 643 | ||
| 646 | code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", | 644 | code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, |
| 647 | base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); | 645 | GetGlobalMemoryBlock(base)); |
| 648 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); | 646 | code.AddLine(" uint {}[];", GetGlobalMemory(base)); |
| 649 | code.AddLine("}};"); | 647 | code.AddLine("}};"); |
| 650 | code.AddNewLine(); | 648 | code.AddNewLine(); |
| @@ -652,15 +650,17 @@ private: | |||
| 652 | } | 650 | } |
| 653 | 651 | ||
| 654 | void DeclareSamplers() { | 652 | void DeclareSamplers() { |
| 655 | const auto& samplers = ir.GetSamplers(); | 653 | u32 binding = device.GetBaseBindings(stage).sampler; |
| 656 | for (const auto& sampler : samplers) { | 654 | for (const auto& sampler : ir.GetSamplers()) { |
| 657 | const std::string name{GetSampler(sampler)}; | 655 | const std::string name = GetSampler(sampler); |
| 658 | const std::string description{"layout (binding = SAMPLER_BINDING_" + | 656 | const std::string description = fmt::format("layout (binding = {}) uniform", binding++); |
| 659 | std::to_string(sampler.GetIndex()) + ") uniform"}; | 657 | |
| 660 | std::string sampler_type = [&]() { | 658 | std::string sampler_type = [&]() { |
| 659 | if (sampler.IsBuffer()) { | ||
| 660 | return "samplerBuffer"; | ||
| 661 | } | ||
| 661 | switch (sampler.GetType()) { | 662 | switch (sampler.GetType()) { |
| 662 | case Tegra::Shader::TextureType::Texture1D: | 663 | case Tegra::Shader::TextureType::Texture1D: |
| 663 | // Special cased, read below. | ||
| 664 | return "sampler1D"; | 664 | return "sampler1D"; |
| 665 | case Tegra::Shader::TextureType::Texture2D: | 665 | case Tegra::Shader::TextureType::Texture2D: |
| 666 | return "sampler2D"; | 666 | return "sampler2D"; |
| @@ -680,21 +680,9 @@ private: | |||
| 680 | sampler_type += "Shadow"; | 680 | sampler_type += "Shadow"; |
| 681 | } | 681 | } |
| 682 | 682 | ||
| 683 | if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) { | 683 | code.AddLine("{} {} {};", description, sampler_type, name); |
| 684 | // 1D textures can be aliased to texture buffers, hide the declarations behind a | ||
| 685 | // preprocessor flag and use one or the other from the GPU state. This has to be | ||
| 686 | // done because shaders don't have enough information to determine the texture type. | ||
| 687 | EmitIfdefIsBuffer(sampler); | ||
| 688 | code.AddLine("{} samplerBuffer {};", description, name); | ||
| 689 | code.AddLine("#else"); | ||
| 690 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 691 | code.AddLine("#endif"); | ||
| 692 | } else { | ||
| 693 | // The other texture types (2D, 3D and cubes) don't have this issue. | ||
| 694 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 695 | } | ||
| 696 | } | 684 | } |
| 697 | if (!samplers.empty()) { | 685 | if (!ir.GetSamplers().empty()) { |
| 698 | code.AddNewLine(); | 686 | code.AddNewLine(); |
| 699 | } | 687 | } |
| 700 | } | 688 | } |
| @@ -717,7 +705,7 @@ private: | |||
| 717 | constexpr u32 element_stride = 4; | 705 | constexpr u32 element_stride = 4; |
| 718 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 706 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 719 | 707 | ||
| 720 | const bool declared = stage != ProgramType::Fragment || | 708 | const bool declared = stage != ShaderType::Fragment || |
| 721 | header.ps.GetAttributeUse(index) != AttributeUse::Unused; | 709 | header.ps.GetAttributeUse(index) != AttributeUse::Unused; |
| 722 | const std::string value = | 710 | const std::string value = |
| 723 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; | 711 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; |
| @@ -734,8 +722,8 @@ private: | |||
| 734 | } | 722 | } |
| 735 | 723 | ||
| 736 | void DeclareImages() { | 724 | void DeclareImages() { |
| 737 | const auto& images{ir.GetImages()}; | 725 | u32 binding = device.GetBaseBindings(stage).image; |
| 738 | for (const auto& image : images) { | 726 | for (const auto& image : ir.GetImages()) { |
| 739 | std::string qualifier = "coherent volatile"; | 727 | std::string qualifier = "coherent volatile"; |
| 740 | if (image.IsRead() && !image.IsWritten()) { | 728 | if (image.IsRead() && !image.IsWritten()) { |
| 741 | qualifier += " readonly"; | 729 | qualifier += " readonly"; |
| @@ -745,10 +733,10 @@ private: | |||
| 745 | 733 | ||
| 746 | const char* format = image.IsAtomic() ? "r32ui, " : ""; | 734 | const char* format = image.IsAtomic() ? "r32ui, " : ""; |
| 747 | const char* type_declaration = GetImageTypeDeclaration(image.GetType()); | 735 | const char* type_declaration = GetImageTypeDeclaration(image.GetType()); |
| 748 | code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, | 736 | code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding++, |
| 749 | image.GetIndex(), qualifier, type_declaration, GetImage(image)); | 737 | qualifier, type_declaration, GetImage(image)); |
| 750 | } | 738 | } |
| 751 | if (!images.empty()) { | 739 | if (!ir.GetImages().empty()) { |
| 752 | code.AddNewLine(); | 740 | code.AddNewLine(); |
| 753 | } | 741 | } |
| 754 | } | 742 | } |
| @@ -809,7 +797,7 @@ private: | |||
| 809 | } | 797 | } |
| 810 | 798 | ||
| 811 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | 799 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { |
| 812 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, | 800 | UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderType::Geometry, |
| 813 | "Physical attributes in geometry shaders are not implemented"); | 801 | "Physical attributes in geometry shaders are not implemented"); |
| 814 | if (abuf->IsPhysicalBuffer()) { | 802 | if (abuf->IsPhysicalBuffer()) { |
| 815 | return {fmt::format("ReadPhysicalAttribute({})", | 803 | return {fmt::format("ReadPhysicalAttribute({})", |
| @@ -868,18 +856,13 @@ private: | |||
| 868 | } | 856 | } |
| 869 | 857 | ||
| 870 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 858 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| 871 | if (stage == ProgramType::Compute) { | ||
| 872 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 873 | } | ||
| 874 | return { | 859 | return { |
| 875 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | 860 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), |
| 876 | Type::Uint}; | 861 | Type::Uint}; |
| 877 | } | 862 | } |
| 878 | 863 | ||
| 879 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | 864 | if (const auto smem = std::get_if<SmemNode>(&*node)) { |
| 880 | return { | 865 | return {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; |
| 881 | fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), | ||
| 882 | Type::Uint}; | ||
| 883 | } | 866 | } |
| 884 | 867 | ||
| 885 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | 868 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { |
| @@ -909,7 +892,7 @@ private: | |||
| 909 | 892 | ||
| 910 | Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | 893 | Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { |
| 911 | const auto GeometryPass = [&](std::string_view name) { | 894 | const auto GeometryPass = [&](std::string_view name) { |
| 912 | if (stage == ProgramType::Geometry && buffer) { | 895 | if (stage == ShaderType::Geometry && buffer) { |
| 913 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 896 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 914 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 897 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 915 | // this happens and what's its intent. | 898 | // this happens and what's its intent. |
| @@ -921,11 +904,11 @@ private: | |||
| 921 | switch (attribute) { | 904 | switch (attribute) { |
| 922 | case Attribute::Index::Position: | 905 | case Attribute::Index::Position: |
| 923 | switch (stage) { | 906 | switch (stage) { |
| 924 | case ProgramType::Geometry: | 907 | case ShaderType::Geometry: |
| 925 | return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), | 908 | return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(), |
| 926 | GetSwizzle(element)), | 909 | GetSwizzle(element)), |
| 927 | Type::Float}; | 910 | Type::Float}; |
| 928 | case ProgramType::Fragment: | 911 | case ShaderType::Fragment: |
| 929 | return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), | 912 | return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), |
| 930 | Type::Float}; | 913 | Type::Float}; |
| 931 | default: | 914 | default: |
| @@ -959,7 +942,7 @@ private: | |||
| 959 | return {"0", Type::Int}; | 942 | return {"0", Type::Int}; |
| 960 | case Attribute::Index::FrontFacing: | 943 | case Attribute::Index::FrontFacing: |
| 961 | // TODO(Subv): Find out what the values are for the other elements. | 944 | // TODO(Subv): Find out what the values are for the other elements. |
| 962 | ASSERT(stage == ProgramType::Fragment); | 945 | ASSERT(stage == ShaderType::Fragment); |
| 963 | switch (element) { | 946 | switch (element) { |
| 964 | case 3: | 947 | case 3: |
| 965 | return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; | 948 | return {"(gl_FrontFacing ? -1 : 0)", Type::Int}; |
| @@ -985,7 +968,7 @@ private: | |||
| 985 | // be found in fragment shaders, so we disable precise there. There are vertex shaders that | 968 | // be found in fragment shaders, so we disable precise there. There are vertex shaders that |
| 986 | // also fail to build but nobody seems to care about those. | 969 | // also fail to build but nobody seems to care about those. |
| 987 | // Note: Only bugged drivers will skip precise. | 970 | // Note: Only bugged drivers will skip precise. |
| 988 | const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment; | 971 | const bool disable_precise = device.HasPreciseBug() && stage == ShaderType::Fragment; |
| 989 | 972 | ||
| 990 | std::string temporary = code.GenerateTemporary(); | 973 | std::string temporary = code.GenerateTemporary(); |
| 991 | code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), | 974 | code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type), |
| @@ -1247,17 +1230,12 @@ private: | |||
| 1247 | } | 1230 | } |
| 1248 | target = std::move(*output); | 1231 | target = std::move(*output); |
| 1249 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1232 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1250 | if (stage == ProgramType::Compute) { | ||
| 1251 | LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||
| 1252 | } | ||
| 1253 | target = { | 1233 | target = { |
| 1254 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), | 1234 | fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()), |
| 1255 | Type::Uint}; | 1235 | Type::Uint}; |
| 1256 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | 1236 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { |
| 1257 | ASSERT(stage == ProgramType::Compute); | 1237 | ASSERT(stage == ShaderType::Compute); |
| 1258 | target = { | 1238 | target = {fmt::format("smem[{} >> 2]", Visit(smem->GetAddress()).AsUint()), Type::Uint}; |
| 1259 | fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()), | ||
| 1260 | Type::Uint}; | ||
| 1261 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1239 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1262 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); | 1240 | const std::string real = Visit(gmem->GetRealAddress()).AsUint(); |
| 1263 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); | 1241 | const std::string base = Visit(gmem->GetBaseAddress()).AsUint(); |
| @@ -1749,27 +1727,14 @@ private: | |||
| 1749 | expr += ", "; | 1727 | expr += ", "; |
| 1750 | } | 1728 | } |
| 1751 | 1729 | ||
| 1752 | // Store a copy of the expression without the lod to be used with texture buffers | 1730 | if (meta->lod && !meta->sampler.IsBuffer()) { |
| 1753 | std::string expr_buffer = expr; | ||
| 1754 | |||
| 1755 | if (meta->lod) { | ||
| 1756 | expr += ", "; | 1731 | expr += ", "; |
| 1757 | expr += Visit(meta->lod).AsInt(); | 1732 | expr += Visit(meta->lod).AsInt(); |
| 1758 | } | 1733 | } |
| 1759 | expr += ')'; | 1734 | expr += ')'; |
| 1760 | expr += GetSwizzle(meta->element); | 1735 | expr += GetSwizzle(meta->element); |
| 1761 | 1736 | ||
| 1762 | expr_buffer += ')'; | 1737 | return {std::move(expr), Type::Float}; |
| 1763 | expr_buffer += GetSwizzle(meta->element); | ||
| 1764 | |||
| 1765 | const std::string tmp{code.GenerateTemporary()}; | ||
| 1766 | EmitIfdefIsBuffer(meta->sampler); | ||
| 1767 | code.AddLine("float {} = {};", tmp, expr_buffer); | ||
| 1768 | code.AddLine("#else"); | ||
| 1769 | code.AddLine("float {} = {};", tmp, expr); | ||
| 1770 | code.AddLine("#endif"); | ||
| 1771 | |||
| 1772 | return {tmp, Type::Float}; | ||
| 1773 | } | 1738 | } |
| 1774 | 1739 | ||
| 1775 | Expression ImageLoad(Operation operation) { | 1740 | Expression ImageLoad(Operation operation) { |
| @@ -1837,7 +1802,7 @@ private: | |||
| 1837 | } | 1802 | } |
| 1838 | 1803 | ||
| 1839 | void PreExit() { | 1804 | void PreExit() { |
| 1840 | if (stage != ProgramType::Fragment) { | 1805 | if (stage != ShaderType::Fragment) { |
| 1841 | return; | 1806 | return; |
| 1842 | } | 1807 | } |
| 1843 | const auto& used_registers = ir.GetRegisters(); | 1808 | const auto& used_registers = ir.GetRegisters(); |
| @@ -1890,14 +1855,14 @@ private: | |||
| 1890 | } | 1855 | } |
| 1891 | 1856 | ||
| 1892 | Expression EmitVertex(Operation operation) { | 1857 | Expression EmitVertex(Operation operation) { |
| 1893 | ASSERT_MSG(stage == ProgramType::Geometry, | 1858 | ASSERT_MSG(stage == ShaderType::Geometry, |
| 1894 | "EmitVertex is expected to be used in a geometry shader."); | 1859 | "EmitVertex is expected to be used in a geometry shader."); |
| 1895 | code.AddLine("EmitVertex();"); | 1860 | code.AddLine("EmitVertex();"); |
| 1896 | return {}; | 1861 | return {}; |
| 1897 | } | 1862 | } |
| 1898 | 1863 | ||
| 1899 | Expression EndPrimitive(Operation operation) { | 1864 | Expression EndPrimitive(Operation operation) { |
| 1900 | ASSERT_MSG(stage == ProgramType::Geometry, | 1865 | ASSERT_MSG(stage == ShaderType::Geometry, |
| 1901 | "EndPrimitive is expected to be used in a geometry shader."); | 1866 | "EndPrimitive is expected to be used in a geometry shader."); |
| 1902 | code.AddLine("EndPrimitive();"); | 1867 | code.AddLine("EndPrimitive();"); |
| 1903 | return {}; | 1868 | return {}; |
| @@ -2193,10 +2158,6 @@ private: | |||
| 2193 | return "lmem_" + suffix; | 2158 | return "lmem_" + suffix; |
| 2194 | } | 2159 | } |
| 2195 | 2160 | ||
| 2196 | std::string GetSharedMemory() const { | ||
| 2197 | return fmt::format("smem_{}", suffix); | ||
| 2198 | } | ||
| 2199 | |||
| 2200 | std::string GetInternalFlag(InternalFlag flag) const { | 2161 | std::string GetInternalFlag(InternalFlag flag) const { |
| 2201 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", | 2162 | constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag", |
| 2202 | "overflow_flag"}; | 2163 | "overflow_flag"}; |
| @@ -2214,10 +2175,6 @@ private: | |||
| 2214 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | 2175 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); |
| 2215 | } | 2176 | } |
| 2216 | 2177 | ||
| 2217 | void EmitIfdefIsBuffer(const Sampler& sampler) { | ||
| 2218 | code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); | ||
| 2219 | } | ||
| 2220 | |||
| 2221 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { | 2178 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { |
| 2222 | return fmt::format("{}_{}_{}", name, index, suffix); | 2179 | return fmt::format("{}_{}_{}", name, index, suffix); |
| 2223 | } | 2180 | } |
| @@ -2236,7 +2193,7 @@ private: | |||
| 2236 | 2193 | ||
| 2237 | const Device& device; | 2194 | const Device& device; |
| 2238 | const ShaderIR& ir; | 2195 | const ShaderIR& ir; |
| 2239 | const ProgramType stage; | 2196 | const ShaderType stage; |
| 2240 | const std::string suffix; | 2197 | const std::string suffix; |
| 2241 | const Header header; | 2198 | const Header header; |
| 2242 | 2199 | ||
| @@ -2491,7 +2448,7 @@ const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | |||
| 2491 | )"; | 2448 | )"; |
| 2492 | } | 2449 | } |
| 2493 | 2450 | ||
| 2494 | std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, | 2451 | std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, |
| 2495 | const std::string& suffix) { | 2452 | const std::string& suffix) { |
| 2496 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2453 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 2497 | decompiler.Decompile(); | 2454 | decompiler.Decompile(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index b1e75e6cc..7876f48d6 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | 14 | #include "video_core/shader/shader_ir.h" |
| 14 | 15 | ||
| 15 | namespace VideoCommon::Shader { | 16 | namespace VideoCommon::Shader { |
| @@ -17,20 +18,8 @@ class ShaderIR; | |||
| 17 | } | 18 | } |
| 18 | 19 | ||
| 19 | namespace OpenGL { | 20 | namespace OpenGL { |
| 20 | |||
| 21 | class Device; | 21 | class Device; |
| 22 | 22 | } | |
| 23 | enum class ProgramType : u32 { | ||
| 24 | VertexA = 0, | ||
| 25 | VertexB = 1, | ||
| 26 | TessellationControl = 2, | ||
| 27 | TessellationEval = 3, | ||
| 28 | Geometry = 4, | ||
| 29 | Fragment = 5, | ||
| 30 | Compute = 6 | ||
| 31 | }; | ||
| 32 | |||
| 33 | } // namespace OpenGL | ||
| 34 | 23 | ||
| 35 | namespace OpenGL::GLShader { | 24 | namespace OpenGL::GLShader { |
| 36 | 25 | ||
| @@ -94,6 +83,6 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | |||
| 94 | std::string GetCommonDeclarations(); | 83 | std::string GetCommonDeclarations(); |
| 95 | 84 | ||
| 96 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 85 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 97 | ProgramType stage, const std::string& suffix); | 86 | Tegra::Engines::ShaderType stage, const std::string& suffix); |
| 98 | 87 | ||
| 99 | } // namespace OpenGL::GLShader | 88 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 184a565e6..cf874a09a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | |||
| 6 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 7 | 8 | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| @@ -12,50 +13,50 @@ | |||
| 12 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 13 | #include "common/scm_rev.h" | 14 | #include "common/scm_rev.h" |
| 14 | #include "common/zstd_compression.h" | 15 | #include "common/zstd_compression.h" |
| 15 | |||
| 16 | #include "core/core.h" | 16 | #include "core/core.h" |
| 17 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| 18 | #include "core/settings.h" | 18 | #include "core/settings.h" |
| 19 | 19 | #include "video_core/engines/shader_type.h" | |
| 20 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 20 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 21 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 22 | 22 | ||
| 23 | namespace OpenGL { | 23 | namespace OpenGL { |
| 24 | 24 | ||
| 25 | using Tegra::Engines::ShaderType; | ||
| 25 | using VideoCommon::Shader::BindlessSamplerMap; | 26 | using VideoCommon::Shader::BindlessSamplerMap; |
| 26 | using VideoCommon::Shader::BoundSamplerMap; | 27 | using VideoCommon::Shader::BoundSamplerMap; |
| 27 | using VideoCommon::Shader::KeyMap; | 28 | using VideoCommon::Shader::KeyMap; |
| 28 | 29 | ||
| 29 | namespace { | 30 | namespace { |
| 30 | 31 | ||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | ||
| 33 | |||
| 34 | enum class TransferableEntryKind : u32 { | ||
| 35 | Raw, | ||
| 36 | Usage, | ||
| 37 | }; | ||
| 38 | |||
| 31 | struct ConstBufferKey { | 39 | struct ConstBufferKey { |
| 32 | u32 cbuf; | 40 | u32 cbuf{}; |
| 33 | u32 offset; | 41 | u32 offset{}; |
| 34 | u32 value; | 42 | u32 value{}; |
| 35 | }; | 43 | }; |
| 36 | 44 | ||
| 37 | struct BoundSamplerKey { | 45 | struct BoundSamplerKey { |
| 38 | u32 offset; | 46 | u32 offset{}; |
| 39 | Tegra::Engines::SamplerDescriptor sampler; | 47 | Tegra::Engines::SamplerDescriptor sampler{}; |
| 40 | }; | 48 | }; |
| 41 | 49 | ||
| 42 | struct BindlessSamplerKey { | 50 | struct BindlessSamplerKey { |
| 43 | u32 cbuf; | 51 | u32 cbuf{}; |
| 44 | u32 offset; | 52 | u32 offset{}; |
| 45 | Tegra::Engines::SamplerDescriptor sampler; | 53 | Tegra::Engines::SamplerDescriptor sampler{}; |
| 46 | }; | ||
| 47 | |||
| 48 | using ShaderCacheVersionHash = std::array<u8, 64>; | ||
| 49 | |||
| 50 | enum class TransferableEntryKind : u32 { | ||
| 51 | Raw, | ||
| 52 | Usage, | ||
| 53 | }; | 54 | }; |
| 54 | 55 | ||
| 55 | constexpr u32 NativeVersion = 5; | 56 | constexpr u32 NativeVersion = 11; |
| 56 | 57 | ||
| 57 | // Making sure sizes doesn't change by accident | 58 | // Making sure sizes doesn't change by accident |
| 58 | static_assert(sizeof(BaseBindings) == 16); | 59 | static_assert(sizeof(ProgramVariant) == 20); |
| 59 | 60 | ||
| 60 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 61 | ShaderCacheVersionHash hash{}; | 62 | ShaderCacheVersionHash hash{}; |
| @@ -66,10 +67,10 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 66 | 67 | ||
| 67 | } // Anonymous namespace | 68 | } // Anonymous namespace |
| 68 | 69 | ||
| 69 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | 70 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, |
| 70 | ProgramCode program_code, ProgramCode program_code_b) | 71 | ProgramCode code_b) |
| 71 | : unique_identifier{unique_identifier}, program_type{program_type}, | 72 | : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( |
| 72 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} | 73 | code_b)} {} |
| 73 | 74 | ||
| 74 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 75 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; |
| 75 | 76 | ||
| @@ -77,42 +78,39 @@ ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; | |||
| 77 | 78 | ||
| 78 | bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | 79 | bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { |
| 79 | if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || | 80 | if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || |
| 80 | file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) { | 81 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { |
| 81 | return false; | 82 | return false; |
| 82 | } | 83 | } |
| 83 | u32 program_code_size{}; | 84 | u32 code_size{}; |
| 84 | u32 program_code_size_b{}; | 85 | u32 code_size_b{}; |
| 85 | if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) || | 86 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || |
| 86 | file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) { | 87 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { |
| 87 | return false; | 88 | return false; |
| 88 | } | 89 | } |
| 89 | 90 | ||
| 90 | program_code.resize(program_code_size); | 91 | code.resize(code_size); |
| 91 | program_code_b.resize(program_code_size_b); | 92 | code_b.resize(code_size_b); |
| 92 | 93 | ||
| 93 | if (file.ReadArray(program_code.data(), program_code_size) != program_code_size) | 94 | if (file.ReadArray(code.data(), code_size) != code_size) |
| 94 | return false; | 95 | return false; |
| 95 | 96 | ||
| 96 | if (HasProgramA() && | 97 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { |
| 97 | file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { | ||
| 98 | return false; | 98 | return false; |
| 99 | } | 99 | } |
| 100 | return true; | 100 | return true; |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { |
| 104 | if (file.WriteObject(unique_identifier) != 1 || | 104 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || |
| 105 | file.WriteObject(static_cast<u32>(program_type)) != 1 || | 105 | file.WriteObject(static_cast<u32>(code.size())) != 1 || |
| 106 | file.WriteObject(static_cast<u32>(program_code.size())) != 1 || | 106 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { |
| 107 | file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { | ||
| 108 | return false; | 107 | return false; |
| 109 | } | 108 | } |
| 110 | 109 | ||
| 111 | if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) | 110 | if (file.WriteArray(code.data(), code.size()) != code.size()) |
| 112 | return false; | 111 | return false; |
| 113 | 112 | ||
| 114 | if (HasProgramA() && | 113 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { |
| 115 | file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { | ||
| 116 | return false; | 114 | return false; |
| 117 | } | 115 | } |
| 118 | return true; | 116 | return true; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index db23ada93..69a2fbdda 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <bitset> | ||
| 8 | #include <optional> | 7 | #include <optional> |
| 9 | #include <string> | 8 | #include <string> |
| 10 | #include <tuple> | 9 | #include <tuple> |
| @@ -19,6 +18,7 @@ | |||
| 19 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 20 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 21 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/shader_type.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | #include "video_core/shader/const_buffer_locker.h" | 23 | #include "video_core/shader/const_buffer_locker.h" |
| 24 | 24 | ||
| @@ -37,42 +37,42 @@ struct ShaderDiskCacheDump; | |||
| 37 | 37 | ||
| 38 | using ProgramCode = std::vector<u64>; | 38 | using ProgramCode = std::vector<u64>; |
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 40 | using TextureBufferUsage = std::bitset<64>; | ||
| 41 | |||
| 42 | /// Allocated bindings used by an OpenGL shader program | ||
| 43 | struct BaseBindings { | ||
| 44 | u32 cbuf{}; | ||
| 45 | u32 gmem{}; | ||
| 46 | u32 sampler{}; | ||
| 47 | u32 image{}; | ||
| 48 | |||
| 49 | bool operator==(const BaseBindings& rhs) const { | ||
| 50 | return std::tie(cbuf, gmem, sampler, image) == | ||
| 51 | std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); | ||
| 52 | } | ||
| 53 | 40 | ||
| 54 | bool operator!=(const BaseBindings& rhs) const { | 41 | /// Describes the different variants a program can be compiled with. |
| 55 | return !operator==(rhs); | 42 | struct ProgramVariant final { |
| 56 | } | 43 | ProgramVariant() = default; |
| 57 | }; | 44 | |
| 58 | static_assert(std::is_trivially_copyable_v<BaseBindings>); | 45 | /// Graphics constructor. |
| 46 | explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept | ||
| 47 | : primitive_mode{primitive_mode} {} | ||
| 48 | |||
| 49 | /// Compute constructor. | ||
| 50 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, | ||
| 51 | u32 local_memory_size) noexcept | ||
| 52 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, | ||
| 53 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} | ||
| 59 | 54 | ||
| 60 | /// Describes the different variants a single program can be compiled. | 55 | // Graphics specific parameters. |
| 61 | struct ProgramVariant { | ||
| 62 | BaseBindings base_bindings; | ||
| 63 | GLenum primitive_mode{}; | 56 | GLenum primitive_mode{}; |
| 64 | TextureBufferUsage texture_buffer_usage{}; | ||
| 65 | 57 | ||
| 66 | bool operator==(const ProgramVariant& rhs) const { | 58 | // Compute specific parameters. |
| 67 | return std::tie(base_bindings, primitive_mode, texture_buffer_usage) == | 59 | u32 block_x{}; |
| 68 | std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage); | 60 | u16 block_y{}; |
| 61 | u16 block_z{}; | ||
| 62 | u32 shared_memory_size{}; | ||
| 63 | u32 local_memory_size{}; | ||
| 64 | |||
| 65 | bool operator==(const ProgramVariant& rhs) const noexcept { | ||
| 66 | return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, | ||
| 67 | local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, | ||
| 68 | rhs.block_z, rhs.shared_memory_size, | ||
| 69 | rhs.local_memory_size); | ||
| 69 | } | 70 | } |
| 70 | 71 | ||
| 71 | bool operator!=(const ProgramVariant& rhs) const { | 72 | bool operator!=(const ProgramVariant& rhs) const noexcept { |
| 72 | return !operator==(rhs); | 73 | return !operator==(rhs); |
| 73 | } | 74 | } |
| 74 | }; | 75 | }; |
| 75 | |||
| 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); |
| 77 | 77 | ||
| 78 | /// Describes how a shader is used. | 78 | /// Describes how a shader is used. |
| @@ -99,21 +99,14 @@ struct ShaderDiskCacheUsage { | |||
| 99 | namespace std { | 99 | namespace std { |
| 100 | 100 | ||
| 101 | template <> | 101 | template <> |
| 102 | struct hash<OpenGL::BaseBindings> { | ||
| 103 | std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { | ||
| 104 | return static_cast<std::size_t>(bindings.cbuf) ^ | ||
| 105 | (static_cast<std::size_t>(bindings.gmem) << 8) ^ | ||
| 106 | (static_cast<std::size_t>(bindings.sampler) << 16) ^ | ||
| 107 | (static_cast<std::size_t>(bindings.image) << 24); | ||
| 108 | } | ||
| 109 | }; | ||
| 110 | |||
| 111 | template <> | ||
| 112 | struct hash<OpenGL::ProgramVariant> { | 102 | struct hash<OpenGL::ProgramVariant> { |
| 113 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | 103 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { |
| 114 | return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^ | 104 | return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ |
| 115 | std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^ | 105 | static_cast<std::size_t>(variant.block_x) ^ |
| 116 | (static_cast<std::size_t>(variant.primitive_mode) << 6); | 106 | (static_cast<std::size_t>(variant.block_y) << 32) ^ |
| 107 | (static_cast<std::size_t>(variant.block_z) << 48) ^ | ||
| 108 | (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ | ||
| 109 | (static_cast<std::size_t>(variant.local_memory_size) << 36); | ||
| 117 | } | 110 | } |
| 118 | }; | 111 | }; |
| 119 | 112 | ||
| @@ -121,7 +114,7 @@ template <> | |||
| 121 | struct hash<OpenGL::ShaderDiskCacheUsage> { | 114 | struct hash<OpenGL::ShaderDiskCacheUsage> { |
| 122 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | 115 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { |
| 123 | return static_cast<std::size_t>(usage.unique_identifier) ^ | 116 | return static_cast<std::size_t>(usage.unique_identifier) ^ |
| 124 | std::hash<OpenGL::ProgramVariant>()(usage.variant); | 117 | std::hash<OpenGL::ProgramVariant>{}(usage.variant); |
| 125 | } | 118 | } |
| 126 | }; | 119 | }; |
| 127 | 120 | ||
| @@ -132,8 +125,8 @@ namespace OpenGL { | |||
| 132 | /// Describes a shader how it's used by the guest GPU | 125 | /// Describes a shader how it's used by the guest GPU |
| 133 | class ShaderDiskCacheRaw { | 126 | class ShaderDiskCacheRaw { |
| 134 | public: | 127 | public: |
| 135 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | 128 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, |
| 136 | ProgramCode program_code, ProgramCode program_code_b = {}); | 129 | ProgramCode code, ProgramCode code_b = {}); |
| 137 | ShaderDiskCacheRaw(); | 130 | ShaderDiskCacheRaw(); |
| 138 | ~ShaderDiskCacheRaw(); | 131 | ~ShaderDiskCacheRaw(); |
| 139 | 132 | ||
| @@ -146,27 +139,26 @@ public: | |||
| 146 | } | 139 | } |
| 147 | 140 | ||
| 148 | bool HasProgramA() const { | 141 | bool HasProgramA() const { |
| 149 | return program_type == ProgramType::VertexA; | 142 | return !code.empty() && !code_b.empty(); |
| 150 | } | 143 | } |
| 151 | 144 | ||
| 152 | ProgramType GetProgramType() const { | 145 | Tegra::Engines::ShaderType GetType() const { |
| 153 | return program_type; | 146 | return type; |
| 154 | } | 147 | } |
| 155 | 148 | ||
| 156 | const ProgramCode& GetProgramCode() const { | 149 | const ProgramCode& GetCode() const { |
| 157 | return program_code; | 150 | return code; |
| 158 | } | 151 | } |
| 159 | 152 | ||
| 160 | const ProgramCode& GetProgramCodeB() const { | 153 | const ProgramCode& GetCodeB() const { |
| 161 | return program_code_b; | 154 | return code_b; |
| 162 | } | 155 | } |
| 163 | 156 | ||
| 164 | private: | 157 | private: |
| 165 | u64 unique_identifier{}; | 158 | u64 unique_identifier{}; |
| 166 | ProgramType program_type{}; | 159 | Tegra::Engines::ShaderType type{}; |
| 167 | 160 | ProgramCode code; | |
| 168 | ProgramCode program_code; | 161 | ProgramCode code_b; |
| 169 | ProgramCode program_code_b; | ||
| 170 | }; | 162 | }; |
| 171 | 163 | ||
| 172 | /// Contains an OpenGL dumped binary program | 164 | /// Contains an OpenGL dumped binary program |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index a63c1a6b8..34946fb47 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -2,8 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <string> | ||
| 6 | |||
| 5 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 8 | |||
| 6 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 9 | #include "video_core/shader/shader_ir.h" | 14 | #include "video_core/shader/shader_ir.h" |
| @@ -11,6 +16,7 @@ | |||
| 11 | namespace OpenGL::GLShader { | 16 | namespace OpenGL::GLShader { |
| 12 | 17 | ||
| 13 | using Tegra::Engines::Maxwell3D; | 18 | using Tegra::Engines::Maxwell3D; |
| 19 | using Tegra::Engines::ShaderType; | ||
| 14 | using VideoCommon::Shader::CompileDepth; | 20 | using VideoCommon::Shader::CompileDepth; |
| 15 | using VideoCommon::Shader::CompilerSettings; | 21 | using VideoCommon::Shader::CompilerSettings; |
| 16 | using VideoCommon::Shader::ProgramCode; | 22 | using VideoCommon::Shader::ProgramCode; |
| @@ -18,16 +24,16 @@ using VideoCommon::Shader::ShaderIR; | |||
| 18 | 24 | ||
| 19 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | 25 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { |
| 20 | std::string out = GetCommonDeclarations(); | 26 | std::string out = GetCommonDeclarations(); |
| 21 | out += R"( | 27 | out += fmt::format(R"( |
| 22 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | 28 | layout (std140, binding = {}) uniform vs_config {{ |
| 23 | float y_direction; | 29 | float y_direction; |
| 24 | }; | 30 | }}; |
| 25 | 31 | ||
| 26 | )"; | 32 | )", |
| 27 | const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; | 33 | EmulationUniformBlockBinding); |
| 28 | out += Decompile(device, ir, stage, "vertex"); | 34 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); |
| 29 | if (ir_b) { | 35 | if (ir_b) { |
| 30 | out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); | 36 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); |
| 31 | } | 37 | } |
| 32 | 38 | ||
| 33 | out += R"( | 39 | out += R"( |
| @@ -44,13 +50,14 @@ void main() { | |||
| 44 | 50 | ||
| 45 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | 51 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { |
| 46 | std::string out = GetCommonDeclarations(); | 52 | std::string out = GetCommonDeclarations(); |
| 47 | out += R"( | 53 | out += fmt::format(R"( |
| 48 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | 54 | layout (std140, binding = {}) uniform gs_config {{ |
| 49 | float y_direction; | 55 | float y_direction; |
| 50 | }; | 56 | }}; |
| 51 | 57 | ||
| 52 | )"; | 58 | )", |
| 53 | out += Decompile(device, ir, ProgramType::Geometry, "geometry"); | 59 | EmulationUniformBlockBinding); |
| 60 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); | ||
| 54 | 61 | ||
| 55 | out += R"( | 62 | out += R"( |
| 56 | void main() { | 63 | void main() { |
| @@ -62,7 +69,7 @@ void main() { | |||
| 62 | 69 | ||
| 63 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | 70 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { |
| 64 | std::string out = GetCommonDeclarations(); | 71 | std::string out = GetCommonDeclarations(); |
| 65 | out += R"( | 72 | out += fmt::format(R"( |
| 66 | layout (location = 0) out vec4 FragColor0; | 73 | layout (location = 0) out vec4 FragColor0; |
| 67 | layout (location = 1) out vec4 FragColor1; | 74 | layout (location = 1) out vec4 FragColor1; |
| 68 | layout (location = 2) out vec4 FragColor2; | 75 | layout (location = 2) out vec4 FragColor2; |
| @@ -72,12 +79,13 @@ layout (location = 5) out vec4 FragColor5; | |||
| 72 | layout (location = 6) out vec4 FragColor6; | 79 | layout (location = 6) out vec4 FragColor6; |
| 73 | layout (location = 7) out vec4 FragColor7; | 80 | layout (location = 7) out vec4 FragColor7; |
| 74 | 81 | ||
| 75 | layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | 82 | layout (std140, binding = {}) uniform fs_config {{ |
| 76 | float y_direction; | 83 | float y_direction; |
| 77 | }; | 84 | }}; |
| 78 | 85 | ||
| 79 | )"; | 86 | )", |
| 80 | out += Decompile(device, ir, ProgramType::Fragment, "fragment"); | 87 | EmulationUniformBlockBinding); |
| 88 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); | ||
| 81 | 89 | ||
| 82 | out += R"( | 90 | out += R"( |
| 83 | void main() { | 91 | void main() { |
| @@ -89,7 +97,7 @@ void main() { | |||
| 89 | 97 | ||
| 90 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { | 98 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { |
| 91 | std::string out = GetCommonDeclarations(); | 99 | std::string out = GetCommonDeclarations(); |
| 92 | out += Decompile(device, ir, ProgramType::Compute, "compute"); | 100 | out += Decompile(device, ir, ShaderType::Compute, "compute"); |
| 93 | out += R"( | 101 | out += R"( |
| 94 | void main() { | 102 | void main() { |
| 95 | execute_compute(); | 103 | execute_compute(); |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index ccbe5912e..39b3986d3 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -417,14 +417,24 @@ void OpenGLState::ApplyClipControl() { | |||
| 417 | } | 417 | } |
| 418 | 418 | ||
| 419 | void OpenGLState::ApplyTextures() { | 419 | void OpenGLState::ApplyTextures() { |
| 420 | if (const auto update = UpdateArray(cur_state.textures, textures)) { | 420 | const std::size_t size = std::size(textures); |
| 421 | glBindTextures(update->first, update->second, textures.data() + update->first); | 421 | for (std::size_t i = 0; i < size; ++i) { |
| 422 | if (UpdateValue(cur_state.textures[i], textures[i])) { | ||
| 423 | // BindTextureUnit doesn't support binding null textures, skip those binds. | ||
| 424 | // TODO(Rodrigo): Stop using null textures | ||
| 425 | if (textures[i] != 0) { | ||
| 426 | glBindTextureUnit(static_cast<GLuint>(i), textures[i]); | ||
| 427 | } | ||
| 428 | } | ||
| 422 | } | 429 | } |
| 423 | } | 430 | } |
| 424 | 431 | ||
| 425 | void OpenGLState::ApplySamplers() { | 432 | void OpenGLState::ApplySamplers() { |
| 426 | if (const auto update = UpdateArray(cur_state.samplers, samplers)) { | 433 | const std::size_t size = std::size(samplers); |
| 427 | glBindSamplers(update->first, update->second, samplers.data() + update->first); | 434 | for (std::size_t i = 0; i < size; ++i) { |
| 435 | if (UpdateValue(cur_state.samplers[i], samplers[i])) { | ||
| 436 | glBindSampler(static_cast<GLuint>(i), samplers[i]); | ||
| 437 | } | ||
| 428 | } | 438 | } |
| 429 | } | 439 | } |
| 430 | 440 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index eaff22bda..e53c2c5f2 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h | |||
| @@ -96,9 +96,11 @@ public: | |||
| 96 | GLenum operation = GL_COPY; | 96 | GLenum operation = GL_COPY; |
| 97 | } logic_op; | 97 | } logic_op; |
| 98 | 98 | ||
| 99 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {}; | 99 | static constexpr std::size_t NumSamplers = 32 * 5; |
| 100 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {}; | 100 | static constexpr std::size_t NumImages = 8 * 5; |
| 101 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; | 101 | std::array<GLuint, NumSamplers> textures = {}; |
| 102 | std::array<GLuint, NumSamplers> samplers = {}; | ||
| 103 | std::array<GLuint, NumImages> images = {}; | ||
| 102 | 104 | ||
| 103 | struct { | 105 | struct { |
| 104 | GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING | 106 | GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index c504a2c1a..9770dda1c 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -3,7 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <string> | 5 | #include <string> |
| 6 | #include <vector> | ||
| 7 | |||
| 6 | #include <fmt/format.h> | 8 | #include <fmt/format.h> |
| 9 | |||
| 7 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 8 | 11 | ||
| 9 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| @@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t | |||
| 48 | 51 | ||
| 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 52 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 50 | 53 | ||
| 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 54 | void BindBuffersRangePushBuffer::Setup() { |
| 52 | first = first_; | 55 | entries.clear(); |
| 53 | buffer_pointers.clear(); | ||
| 54 | offsets.clear(); | ||
| 55 | sizes.clear(); | ||
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, |
| 59 | buffer_pointers.push_back(buffer); | 59 | GLsizeiptr size) { |
| 60 | offsets.push_back(offset); | 60 | entries.push_back(Entry{binding, buffer, offset, size}); |
| 61 | sizes.push_back(size); | ||
| 62 | } | 61 | } |
| 63 | 62 | ||
| 64 | void BindBuffersRangePushBuffer::Bind() { | 63 | void BindBuffersRangePushBuffer::Bind() { |
| 65 | // Ensure sizes are valid. | 64 | for (const Entry& entry : entries) { |
| 66 | const std::size_t count{buffer_pointers.size()}; | 65 | glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); |
| 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | ||
| 68 | if (count == 0) { | ||
| 69 | return; | ||
| 70 | } | 66 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | ||
| 78 | sizes.data()); | ||
| 79 | } | 67 | } |
| 80 | 68 | ||
| 81 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { | 69 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 6c2b45546..d56153fe7 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -43,20 +43,22 @@ public: | |||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | 43 | explicit BindBuffersRangePushBuffer(GLenum target); |
| 44 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 45 | 45 | ||
| 46 | void Setup(GLuint first_); | 46 | void Setup(); |
| 47 | 47 | ||
| 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 49 | 49 | ||
| 50 | void Bind(); | 50 | void Bind(); |
| 51 | 51 | ||
| 52 | private: | 52 | private: |
| 53 | GLenum target{}; | 53 | struct Entry { |
| 54 | GLuint first{}; | 54 | GLuint binding; |
| 55 | std::vector<const GLuint*> buffer_pointers; | 55 | const GLuint* buffer; |
| 56 | GLintptr offset; | ||
| 57 | GLsizeiptr size; | ||
| 58 | }; | ||
| 56 | 59 | ||
| 57 | std::vector<GLuint> buffers; | 60 | GLenum target; |
| 58 | std::vector<GLintptr> offsets; | 61 | std::vector<Entry> entries; |
| 59 | std::vector<GLsizeiptr> sizes; | ||
| 60 | }; | 62 | }; |
| 61 | 63 | ||
| 62 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); | 64 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 463ed43ae..7f0eb6b74 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -13,6 +13,8 @@ | |||
| 13 | 13 | ||
| 14 | namespace Vulkan::MaxwellToVK { | 14 | namespace Vulkan::MaxwellToVK { |
| 15 | 15 | ||
| 16 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 17 | |||
| 16 | namespace Sampler { | 18 | namespace Sampler { |
| 17 | 19 | ||
| 18 | vk::Filter Filter(Tegra::Texture::TextureFilter filter) { | 20 | vk::Filter Filter(Tegra::Texture::TextureFilter filter) { |
| @@ -196,17 +198,17 @@ std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType for | |||
| 196 | return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; | 198 | return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; |
| 197 | } | 199 | } |
| 198 | 200 | ||
| 199 | vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { | 201 | vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { |
| 200 | switch (stage) { | 202 | switch (stage) { |
| 201 | case Maxwell::ShaderStage::Vertex: | 203 | case Tegra::Engines::ShaderType::Vertex: |
| 202 | return vk::ShaderStageFlagBits::eVertex; | 204 | return vk::ShaderStageFlagBits::eVertex; |
| 203 | case Maxwell::ShaderStage::TesselationControl: | 205 | case Tegra::Engines::ShaderType::TesselationControl: |
| 204 | return vk::ShaderStageFlagBits::eTessellationControl; | 206 | return vk::ShaderStageFlagBits::eTessellationControl; |
| 205 | case Maxwell::ShaderStage::TesselationEval: | 207 | case Tegra::Engines::ShaderType::TesselationEval: |
| 206 | return vk::ShaderStageFlagBits::eTessellationEvaluation; | 208 | return vk::ShaderStageFlagBits::eTessellationEvaluation; |
| 207 | case Maxwell::ShaderStage::Geometry: | 209 | case Tegra::Engines::ShaderType::Geometry: |
| 208 | return vk::ShaderStageFlagBits::eGeometry; | 210 | return vk::ShaderStageFlagBits::eGeometry; |
| 209 | case Maxwell::ShaderStage::Fragment: | 211 | case Tegra::Engines::ShaderType::Fragment: |
| 210 | return vk::ShaderStageFlagBits::eFragment; | 212 | return vk::ShaderStageFlagBits::eFragment; |
| 211 | } | 213 | } |
| 212 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); | 214 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 5b0ffd87a..904a32e01 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -32,7 +32,7 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar | |||
| 32 | std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, | 32 | std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, |
| 33 | PixelFormat pixel_format); | 33 | PixelFormat pixel_format); |
| 34 | 34 | ||
| 35 | vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); | 35 | vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); |
| 36 | 36 | ||
| 37 | vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); | 37 | vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); |
| 38 | 38 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 2850d5b59..80738d3d0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "video_core/engines/maxwell_3d.h" | 17 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/engines/shader_bytecode.h" | 18 | #include "video_core/engines/shader_bytecode.h" |
| 19 | #include "video_core/engines/shader_header.h" | 19 | #include "video_core/engines/shader_header.h" |
| 20 | #include "video_core/engines/shader_type.h" | ||
| 20 | #include "video_core/renderer_vulkan/vk_device.h" | 21 | #include "video_core/renderer_vulkan/vk_device.h" |
| 21 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 22 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 22 | #include "video_core/shader/node.h" | 23 | #include "video_core/shader/node.h" |
| @@ -25,13 +26,13 @@ | |||
| 25 | namespace Vulkan::VKShader { | 26 | namespace Vulkan::VKShader { |
| 26 | 27 | ||
| 27 | using Sirit::Id; | 28 | using Sirit::Id; |
| 29 | using Tegra::Engines::ShaderType; | ||
| 28 | using Tegra::Shader::Attribute; | 30 | using Tegra::Shader::Attribute; |
| 29 | using Tegra::Shader::AttributeUse; | 31 | using Tegra::Shader::AttributeUse; |
| 30 | using Tegra::Shader::Register; | 32 | using Tegra::Shader::Register; |
| 31 | using namespace VideoCommon::Shader; | 33 | using namespace VideoCommon::Shader; |
| 32 | 34 | ||
| 33 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 35 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 34 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||
| 35 | using Operation = const OperationNode&; | 36 | using Operation = const OperationNode&; |
| 36 | 37 | ||
| 37 | // TODO(Rodrigo): Use rasterizer's value | 38 | // TODO(Rodrigo): Use rasterizer's value |
| @@ -93,7 +94,7 @@ class ExprDecompiler; | |||
| 93 | 94 | ||
| 94 | class SPIRVDecompiler : public Sirit::Module { | 95 | class SPIRVDecompiler : public Sirit::Module { |
| 95 | public: | 96 | public: |
| 96 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) | 97 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage) |
| 97 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { | 98 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { |
| 98 | AddCapability(spv::Capability::Shader); | 99 | AddCapability(spv::Capability::Shader); |
| 99 | AddExtension("SPV_KHR_storage_buffer_storage_class"); | 100 | AddExtension("SPV_KHR_storage_buffer_storage_class"); |
| @@ -256,21 +257,21 @@ private: | |||
| 256 | } | 257 | } |
| 257 | 258 | ||
| 258 | void DeclareVertex() { | 259 | void DeclareVertex() { |
| 259 | if (stage != ShaderStage::Vertex) | 260 | if (stage != ShaderType::Vertex) |
| 260 | return; | 261 | return; |
| 261 | 262 | ||
| 262 | DeclareVertexRedeclarations(); | 263 | DeclareVertexRedeclarations(); |
| 263 | } | 264 | } |
| 264 | 265 | ||
| 265 | void DeclareGeometry() { | 266 | void DeclareGeometry() { |
| 266 | if (stage != ShaderStage::Geometry) | 267 | if (stage != ShaderType::Geometry) |
| 267 | return; | 268 | return; |
| 268 | 269 | ||
| 269 | UNIMPLEMENTED(); | 270 | UNIMPLEMENTED(); |
| 270 | } | 271 | } |
| 271 | 272 | ||
| 272 | void DeclareFragment() { | 273 | void DeclareFragment() { |
| 273 | if (stage != ShaderStage::Fragment) | 274 | if (stage != ShaderType::Fragment) |
| 274 | return; | 275 | return; |
| 275 | 276 | ||
| 276 | for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { | 277 | for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) { |
| @@ -354,7 +355,7 @@ private: | |||
| 354 | continue; | 355 | continue; |
| 355 | } | 356 | } |
| 356 | 357 | ||
| 357 | UNIMPLEMENTED_IF(stage == ShaderStage::Geometry); | 358 | UNIMPLEMENTED_IF(stage == ShaderType::Geometry); |
| 358 | 359 | ||
| 359 | const u32 location = GetGenericAttributeLocation(index); | 360 | const u32 location = GetGenericAttributeLocation(index); |
| 360 | const Id id = OpVariable(t_in_float4, spv::StorageClass::Input); | 361 | const Id id = OpVariable(t_in_float4, spv::StorageClass::Input); |
| @@ -364,7 +365,7 @@ private: | |||
| 364 | 365 | ||
| 365 | Decorate(id, spv::Decoration::Location, location); | 366 | Decorate(id, spv::Decoration::Location, location); |
| 366 | 367 | ||
| 367 | if (stage != ShaderStage::Fragment) { | 368 | if (stage != ShaderType::Fragment) { |
| 368 | continue; | 369 | continue; |
| 369 | } | 370 | } |
| 370 | switch (header.ps.GetAttributeUse(location)) { | 371 | switch (header.ps.GetAttributeUse(location)) { |
| @@ -548,7 +549,7 @@ private: | |||
| 548 | 549 | ||
| 549 | switch (attribute) { | 550 | switch (attribute) { |
| 550 | case Attribute::Index::Position: | 551 | case Attribute::Index::Position: |
| 551 | if (stage != ShaderStage::Fragment) { | 552 | if (stage != ShaderType::Fragment) { |
| 552 | UNIMPLEMENTED(); | 553 | UNIMPLEMENTED(); |
| 553 | break; | 554 | break; |
| 554 | } else { | 555 | } else { |
| @@ -561,7 +562,7 @@ private: | |||
| 561 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 562 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 562 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 563 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 563 | // shader. | 564 | // shader. |
| 564 | ASSERT(stage == ShaderStage::Vertex); | 565 | ASSERT(stage == ShaderType::Vertex); |
| 565 | switch (element) { | 566 | switch (element) { |
| 566 | case 2: | 567 | case 2: |
| 567 | return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index))); | 568 | return BitcastFrom<Type::Uint>(Emit(OpLoad(t_uint, instance_index))); |
| @@ -572,7 +573,7 @@ private: | |||
| 572 | return Constant(t_float, 0); | 573 | return Constant(t_float, 0); |
| 573 | case Attribute::Index::FrontFacing: | 574 | case Attribute::Index::FrontFacing: |
| 574 | // TODO(Subv): Find out what the values are for the other elements. | 575 | // TODO(Subv): Find out what the values are for the other elements. |
| 575 | ASSERT(stage == ShaderStage::Fragment); | 576 | ASSERT(stage == ShaderType::Fragment); |
| 576 | if (element == 3) { | 577 | if (element == 3) { |
| 577 | const Id is_front_facing = Emit(OpLoad(t_bool, front_facing)); | 578 | const Id is_front_facing = Emit(OpLoad(t_bool, front_facing)); |
| 578 | const Id true_value = | 579 | const Id true_value = |
| @@ -1075,7 +1076,7 @@ private: | |||
| 1075 | 1076 | ||
| 1076 | Id PreExit() { | 1077 | Id PreExit() { |
| 1077 | switch (stage) { | 1078 | switch (stage) { |
| 1078 | case ShaderStage::Vertex: { | 1079 | case ShaderType::Vertex: { |
| 1079 | // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't | 1080 | // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't |
| 1080 | // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. | 1081 | // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. |
| 1081 | const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); | 1082 | const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); |
| @@ -1085,7 +1086,7 @@ private: | |||
| 1085 | Emit(OpStore(z_pointer, depth)); | 1086 | Emit(OpStore(z_pointer, depth)); |
| 1086 | break; | 1087 | break; |
| 1087 | } | 1088 | } |
| 1088 | case ShaderStage::Fragment: { | 1089 | case ShaderType::Fragment: { |
| 1089 | const auto SafeGetRegister = [&](u32 reg) { | 1090 | const auto SafeGetRegister = [&](u32 reg) { |
| 1090 | // TODO(Rodrigo): Replace with contains once C++20 releases | 1091 | // TODO(Rodrigo): Replace with contains once C++20 releases |
| 1091 | if (const auto it = registers.find(reg); it != registers.end()) { | 1092 | if (const auto it = registers.find(reg); it != registers.end()) { |
| @@ -1511,7 +1512,7 @@ private: | |||
| 1511 | 1512 | ||
| 1512 | const VKDevice& device; | 1513 | const VKDevice& device; |
| 1513 | const ShaderIR& ir; | 1514 | const ShaderIR& ir; |
| 1514 | const ShaderStage stage; | 1515 | const ShaderType stage; |
| 1515 | const Tegra::Shader::Header header; | 1516 | const Tegra::Shader::Header header; |
| 1516 | u64 conditional_nest_count{}; | 1517 | u64 conditional_nest_count{}; |
| 1517 | u64 inside_branch{}; | 1518 | u64 inside_branch{}; |
| @@ -1843,7 +1844,7 @@ void SPIRVDecompiler::DecompileAST() { | |||
| 1843 | } | 1844 | } |
| 1844 | 1845 | ||
| 1845 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 1846 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 1846 | Maxwell::ShaderStage stage) { | 1847 | ShaderType stage) { |
| 1847 | auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); | 1848 | auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage); |
| 1848 | decompiler->Decompile(); | 1849 | decompiler->Decompile(); |
| 1849 | return {std::move(decompiler), decompiler->GetShaderEntries()}; | 1850 | return {std::move(decompiler), decompiler->GetShaderEntries()}; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f90541cc1..203fc00d0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -79,6 +79,6 @@ struct ShaderEntries { | |||
| 79 | using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; | 79 | using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>; |
| 80 | 80 | ||
| 81 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 81 | DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 82 | Maxwell::ShaderStage stage); | 82 | Tegra::Engines::ShaderType stage); |
| 83 | 83 | ||
| 84 | } // namespace Vulkan::VKShader | 84 | } // namespace Vulkan::VKShader |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index fe467608e..b65399f91 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/engines/shader_type.h" | ||
| 12 | #include "video_core/shader/const_buffer_locker.h" | 13 | #include "video_core/shader/const_buffer_locker.h" |
| 13 | 14 | ||
| 14 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 600e2f3c3..50a8ce42a 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "common/hash.h" | 9 | #include "common/hash.h" |
| 10 | #include "video_core/engines/const_buffer_engine_interface.h" | 10 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 11 | #include "video_core/engines/shader_type.h" | ||
| 11 | 12 | ||
| 12 | namespace VideoCommon::Shader { | 13 | namespace VideoCommon::Shader { |
| 13 | 14 | ||
| @@ -20,7 +21,7 @@ using BindlessSamplerMap = | |||
| 20 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | 21 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader |
| 21 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk | 22 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk |
| 22 | * shader compilation. | 23 | * shader compilation. |
| 23 | **/ | 24 | */ |
| 24 | class ConstBufferLocker { | 25 | class ConstBufferLocker { |
| 25 | public: | 26 | public: |
| 26 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | 27 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bb926a132..b094e5a06 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -128,8 +128,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 128 | } | 128 | } |
| 129 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | 129 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); |
| 130 | 130 | ||
| 131 | const auto& sampler = | 131 | const SamplerInfo info{TextureType::Texture2D, false, depth_compare}; |
| 132 | GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); | 132 | const auto& sampler = GetSampler(instr.sampler, info); |
| 133 | 133 | ||
| 134 | Node4 values; | 134 | Node4 values; |
| 135 | for (u32 element = 0; element < values.size(); ++element) { | 135 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -149,7 +149,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 149 | // Sadly, not all texture instructions specify the type of texture their sampler | 149 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 150 | // uses. This must be fixed at a later instance. | 150 | // uses. This must be fixed at a later instance. |
| 151 | const auto& sampler = | 151 | const auto& sampler = |
| 152 | is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); | 152 | is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); |
| 153 | 153 | ||
| 154 | u32 indexer = 0; | 154 | u32 indexer = 0; |
| 155 | switch (instr.txq.query_type) { | 155 | switch (instr.txq.query_type) { |
| @@ -185,8 +185,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 185 | auto texture_type = instr.tmml.texture_type.Value(); | 185 | auto texture_type = instr.tmml.texture_type.Value(); |
| 186 | const bool is_array = instr.tmml.array != 0; | 186 | const bool is_array = instr.tmml.array != 0; |
| 187 | const auto& sampler = | 187 | const auto& sampler = |
| 188 | is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) | 188 | is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); |
| 189 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); | ||
| 190 | 189 | ||
| 191 | std::vector<Node> coords; | 190 | std::vector<Node> coords; |
| 192 | 191 | ||
| @@ -254,67 +253,50 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 254 | return pc; | 253 | return pc; |
| 255 | } | 254 | } |
| 256 | 255 | ||
| 257 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, | 256 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset, |
| 258 | std::optional<SamplerInfo> sampler_info) { | 257 | std::optional<u32> buffer) { |
| 259 | const auto offset = static_cast<u32>(sampler.index.Value()); | ||
| 260 | |||
| 261 | TextureType type; | ||
| 262 | bool is_array; | ||
| 263 | bool is_shadow; | ||
| 264 | if (sampler_info) { | 258 | if (sampler_info) { |
| 265 | type = sampler_info->type; | 259 | return *sampler_info; |
| 266 | is_array = sampler_info->is_array; | 260 | } |
| 267 | is_shadow = sampler_info->is_shadow; | 261 | const auto sampler = |
| 268 | } else if (const auto sampler = locker.ObtainBoundSampler(offset)) { | 262 | buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); |
| 269 | type = sampler->texture_type.Value(); | 263 | if (!sampler) { |
| 270 | is_array = sampler->is_array.Value() != 0; | ||
| 271 | is_shadow = sampler->is_shadow.Value() != 0; | ||
| 272 | } else { | ||
| 273 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 264 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 274 | type = TextureType::Texture2D; | 265 | return SamplerInfo{TextureType::Texture2D, false, false, false}; |
| 275 | is_array = false; | ||
| 276 | is_shadow = false; | ||
| 277 | } | 266 | } |
| 267 | return SamplerInfo{sampler->texture_type, sampler->is_array != 0, sampler->is_shadow != 0, | ||
| 268 | sampler->is_buffer != 0}; | ||
| 269 | } | ||
| 270 | |||
| 271 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, | ||
| 272 | std::optional<SamplerInfo> sampler_info) { | ||
| 273 | const auto offset = static_cast<u32>(sampler.index.Value()); | ||
| 274 | const auto info = GetSamplerInfo(sampler_info, offset); | ||
| 278 | 275 | ||
| 279 | // If this sampler has already been used, return the existing mapping. | 276 | // If this sampler has already been used, return the existing mapping. |
| 280 | const auto it = | 277 | const auto it = |
| 281 | std::find_if(used_samplers.begin(), used_samplers.end(), | 278 | std::find_if(used_samplers.begin(), used_samplers.end(), |
| 282 | [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); | 279 | [offset](const Sampler& entry) { return entry.GetOffset() == offset; }); |
| 283 | if (it != used_samplers.end()) { | 280 | if (it != used_samplers.end()) { |
| 284 | ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && | 281 | ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && |
| 285 | it->IsShadow() == is_shadow); | 282 | it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer); |
| 286 | return *it; | 283 | return *it; |
| 287 | } | 284 | } |
| 288 | 285 | ||
| 289 | // Otherwise create a new mapping for this sampler | 286 | // Otherwise create a new mapping for this sampler |
| 290 | const auto next_index = static_cast<u32>(used_samplers.size()); | 287 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 291 | return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow)); | 288 | return used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, |
| 289 | info.is_buffer); | ||
| 292 | } | 290 | } |
| 293 | 291 | ||
| 294 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, | 292 | const Sampler& ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, |
| 295 | std::optional<SamplerInfo> sampler_info) { | 293 | std::optional<SamplerInfo> sampler_info) { |
| 296 | const Node sampler_register = GetRegister(reg); | 294 | const Node sampler_register = GetRegister(reg); |
| 297 | const auto [base_sampler, buffer, offset] = | 295 | const auto [base_sampler, buffer, offset] = |
| 298 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 296 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 299 | ASSERT(base_sampler != nullptr); | 297 | ASSERT(base_sampler != nullptr); |
| 300 | 298 | ||
| 301 | TextureType type; | 299 | const auto info = GetSamplerInfo(sampler_info, offset, buffer); |
| 302 | bool is_array; | ||
| 303 | bool is_shadow; | ||
| 304 | if (sampler_info) { | ||
| 305 | type = sampler_info->type; | ||
| 306 | is_array = sampler_info->is_array; | ||
| 307 | is_shadow = sampler_info->is_shadow; | ||
| 308 | } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) { | ||
| 309 | type = sampler->texture_type.Value(); | ||
| 310 | is_array = sampler->is_array.Value() != 0; | ||
| 311 | is_shadow = sampler->is_shadow.Value() != 0; | ||
| 312 | } else { | ||
| 313 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | ||
| 314 | type = TextureType::Texture2D; | ||
| 315 | is_array = false; | ||
| 316 | is_shadow = false; | ||
| 317 | } | ||
| 318 | 300 | ||
| 319 | // If this sampler has already been used, return the existing mapping. | 301 | // If this sampler has already been used, return the existing mapping. |
| 320 | const auto it = | 302 | const auto it = |
| @@ -323,15 +305,15 @@ const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, | |||
| 323 | return entry.GetBuffer() == buffer && entry.GetOffset() == offset; | 305 | return entry.GetBuffer() == buffer && entry.GetOffset() == offset; |
| 324 | }); | 306 | }); |
| 325 | if (it != used_samplers.end()) { | 307 | if (it != used_samplers.end()) { |
| 326 | ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array && | 308 | ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && |
| 327 | it->IsShadow() == is_shadow); | 309 | it->IsShadow() == info.is_shadow); |
| 328 | return *it; | 310 | return *it; |
| 329 | } | 311 | } |
| 330 | 312 | ||
| 331 | // Otherwise create a new mapping for this sampler | 313 | // Otherwise create a new mapping for this sampler |
| 332 | const auto next_index = static_cast<u32>(used_samplers.size()); | 314 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 333 | return used_samplers.emplace_back( | 315 | return used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, |
| 334 | Sampler(next_index, offset, buffer, type, is_array, is_shadow)); | 316 | info.is_shadow, info.is_buffer); |
| 335 | } | 317 | } |
| 336 | 318 | ||
| 337 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | 319 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { |
| @@ -416,17 +398,16 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 416 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | 398 | (texture_type == TextureType::TextureCube && is_array && is_shadow), |
| 417 | "This method is not supported."); | 399 | "This method is not supported."); |
| 418 | 400 | ||
| 401 | const SamplerInfo info{texture_type, is_array, is_shadow, false}; | ||
| 419 | const auto& sampler = | 402 | const auto& sampler = |
| 420 | is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) | 403 | is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); |
| 421 | : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); | ||
| 422 | 404 | ||
| 423 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | 405 | const bool lod_needed = process_mode == TextureProcessMode::LZ || |
| 424 | process_mode == TextureProcessMode::LL || | 406 | process_mode == TextureProcessMode::LL || |
| 425 | process_mode == TextureProcessMode::LLA; | 407 | process_mode == TextureProcessMode::LLA; |
| 426 | 408 | ||
| 427 | // LOD selection (either via bias or explicit textureLod) not | 409 | // LOD selection (either via bias or explicit textureLod) not supported in GL for |
| 428 | // supported in GL for sampler2DArrayShadow and | 410 | // sampler2DArrayShadow and samplerCubeArrayShadow. |
| 429 | // samplerCubeArrayShadow. | ||
| 430 | const bool gl_lod_supported = | 411 | const bool gl_lod_supported = |
| 431 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || | 412 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || |
| 432 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); | 413 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); |
| @@ -436,8 +417,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 436 | 417 | ||
| 437 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | 418 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); |
| 438 | 419 | ||
| 439 | Node bias = {}; | 420 | Node bias; |
| 440 | Node lod = {}; | 421 | Node lod; |
| 441 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | 422 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
| 442 | switch (process_mode) { | 423 | switch (process_mode) { |
| 443 | case TextureProcessMode::LZ: | 424 | case TextureProcessMode::LZ: |
| @@ -573,10 +554,9 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 573 | 554 | ||
| 574 | u64 parameter_register = instr.gpr20.Value(); | 555 | u64 parameter_register = instr.gpr20.Value(); |
| 575 | 556 | ||
| 576 | const auto& sampler = | 557 | const SamplerInfo info{texture_type, is_array, depth_compare, false}; |
| 577 | is_bindless | 558 | const auto& sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) |
| 578 | ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}}) | 559 | : GetSampler(instr.sampler, info); |
| 579 | : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); | ||
| 580 | 560 | ||
| 581 | std::vector<Node> aoffi; | 561 | std::vector<Node> aoffi; |
| 582 | if (is_aoffi) { | 562 | if (is_aoffi) { |
| @@ -623,7 +603,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 623 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | 603 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; |
| 624 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | 604 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; |
| 625 | 605 | ||
| 626 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); | 606 | const auto& sampler = GetSampler(instr.sampler); |
| 627 | 607 | ||
| 628 | Node4 values; | 608 | Node4 values; |
| 629 | for (u32 element = 0; element < values.size(); ++element) { | 609 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -636,6 +616,8 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 636 | } | 616 | } |
| 637 | 617 | ||
| 638 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 618 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 619 | const auto& sampler = GetSampler(instr.sampler); | ||
| 620 | |||
| 639 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 621 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 640 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 622 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
| 641 | 623 | ||
| @@ -659,7 +641,14 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 659 | // When lod is used always is in gpr20 | 641 | // When lod is used always is in gpr20 |
| 660 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 642 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 661 | 643 | ||
| 662 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); | 644 | // Fill empty entries from the guest sampler. |
| 645 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); | ||
| 646 | if (type_coord_count != entry_coord_count) { | ||
| 647 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); | ||
| 648 | } | ||
| 649 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { | ||
| 650 | coords.push_back(GetRegister(Register::ZeroIndex)); | ||
| 651 | } | ||
| 663 | 652 | ||
| 664 | Node4 values; | 653 | Node4 values; |
| 665 | for (u32 element = 0; element < values.size(); ++element) { | 654 | for (u32 element = 0; element < values.size(); ++element) { |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 54217e6a4..44d85d434 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -225,14 +225,15 @@ class Sampler { | |||
| 225 | public: | 225 | public: |
| 226 | /// This constructor is for bound samplers | 226 | /// This constructor is for bound samplers |
| 227 | constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, | 227 | constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, |
| 228 | bool is_array, bool is_shadow) | 228 | bool is_array, bool is_shadow, bool is_buffer) |
| 229 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} | 229 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |
| 230 | is_buffer{is_buffer} {} | ||
| 230 | 231 | ||
| 231 | /// This constructor is for bindless samplers | 232 | /// This constructor is for bindless samplers |
| 232 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, | 233 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, |
| 233 | bool is_array, bool is_shadow) | 234 | bool is_array, bool is_shadow, bool is_buffer) |
| 234 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, | 235 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, |
| 235 | is_shadow{is_shadow}, is_bindless{true} {} | 236 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} |
| 236 | 237 | ||
| 237 | constexpr u32 GetIndex() const { | 238 | constexpr u32 GetIndex() const { |
| 238 | return index; | 239 | return index; |
| @@ -258,6 +259,10 @@ public: | |||
| 258 | return is_shadow; | 259 | return is_shadow; |
| 259 | } | 260 | } |
| 260 | 261 | ||
| 262 | constexpr bool IsBuffer() const { | ||
| 263 | return is_buffer; | ||
| 264 | } | ||
| 265 | |||
| 261 | constexpr bool IsBindless() const { | 266 | constexpr bool IsBindless() const { |
| 262 | return is_bindless; | 267 | return is_bindless; |
| 263 | } | 268 | } |
| @@ -270,6 +275,7 @@ private: | |||
| 270 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 275 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 271 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | 276 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
| 272 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | 277 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. |
| 278 | bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. | ||
| 273 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. | 279 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. |
| 274 | }; | 280 | }; |
| 275 | 281 | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 76a849818..2f71a50d2 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -179,6 +179,7 @@ private: | |||
| 179 | Tegra::Shader::TextureType type; | 179 | Tegra::Shader::TextureType type; |
| 180 | bool is_array; | 180 | bool is_array; |
| 181 | bool is_shadow; | 181 | bool is_shadow; |
| 182 | bool is_buffer; | ||
| 182 | }; | 183 | }; |
| 183 | 184 | ||
| 184 | void Decode(); | 185 | void Decode(); |
| @@ -303,13 +304,17 @@ private: | |||
| 303 | /// Returns a predicate combiner operation | 304 | /// Returns a predicate combiner operation |
| 304 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | 305 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |
| 305 | 306 | ||
| 307 | /// Queries the missing sampler info from the execution context. | ||
| 308 | SamplerInfo GetSamplerInfo(std::optional<SamplerInfo> sampler_info, u32 offset, | ||
| 309 | std::optional<u32> buffer = std::nullopt); | ||
| 310 | |||
| 306 | /// Accesses a texture sampler | 311 | /// Accesses a texture sampler |
| 307 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | 312 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |
| 308 | std::optional<SamplerInfo> sampler_info); | 313 | std::optional<SamplerInfo> sampler_info = std::nullopt); |
| 309 | 314 | ||
| 310 | // Accesses a texture sampler for a bindless texture. | 315 | /// Accesses a texture sampler for a bindless texture. |
| 311 | const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, | 316 | const Sampler& GetBindlessSampler(Tegra::Shader::Register reg, |
| 312 | std::optional<SamplerInfo> sampler_info); | 317 | std::optional<SamplerInfo> sampler_info = std::nullopt); |
| 313 | 318 | ||
| 314 | /// Accesses an image. | 319 | /// Accesses an image. |
| 315 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | 320 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |