diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.h | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 44 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.h | 34 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 49 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 38 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 76 |
13 files changed, 380 insertions, 14 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 509ca117a..6113e17ff 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -87,6 +87,7 @@ add_library(video_core STATIC | |||
| 87 | shader/decode.cpp | 87 | shader/decode.cpp |
| 88 | shader/shader_ir.cpp | 88 | shader/shader_ir.cpp |
| 89 | shader/shader_ir.h | 89 | shader/shader_ir.h |
| 90 | shader/track.cpp | ||
| 90 | surface.cpp | 91 | surface.cpp |
| 91 | surface.h | 92 | surface.h |
| 92 | textures/astc.cpp | 93 | textures/astc.cpp |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index cdef97bc6..9989825f8 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -208,6 +208,8 @@ enum class UniformType : u64 { | |||
| 208 | SignedShort = 3, | 208 | SignedShort = 3, |
| 209 | Single = 4, | 209 | Single = 4, |
| 210 | Double = 5, | 210 | Double = 5, |
| 211 | Quad = 6, | ||
| 212 | UnsignedQuad = 7, | ||
| 211 | }; | 213 | }; |
| 212 | 214 | ||
| 213 | enum class StoreType : u64 { | 215 | enum class StoreType : u64 { |
| @@ -785,6 +787,12 @@ union Instruction { | |||
| 785 | } st_l; | 787 | } st_l; |
| 786 | 788 | ||
| 787 | union { | 789 | union { |
| 790 | BitField<48, 3, UniformType> type; | ||
| 791 | BitField<46, 2, u64> cache_mode; | ||
| 792 | BitField<20, 24, s64> immediate_offset; | ||
| 793 | } ldg; | ||
| 794 | |||
| 795 | union { | ||
| 788 | BitField<0, 3, u64> pred0; | 796 | BitField<0, 3, u64> pred0; |
| 789 | BitField<3, 3, u64> pred3; | 797 | BitField<3, 3, u64> pred3; |
| 790 | BitField<7, 1, u64> abs_a; | 798 | BitField<7, 1, u64> abs_a; |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 7992b82c4..c7f32feaa 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp | |||
| @@ -4,8 +4,13 @@ | |||
| 4 | 4 | ||
| 5 | #include <glad/glad.h> | 5 | #include <glad/glad.h> |
| 6 | 6 | ||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/logging/log.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "core/memory.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_global_cache.h" | 11 | #include "video_core/renderer_opengl/gl_global_cache.h" |
| 8 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 9 | #include "video_core/renderer_opengl/utils.h" | 14 | #include "video_core/renderer_opengl/utils.h" |
| 10 | 15 | ||
| 11 | namespace OpenGL { | 16 | namespace OpenGL { |
| @@ -18,7 +23,72 @@ CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{ | |||
| 18 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); | 23 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); |
| 19 | } | 24 | } |
| 20 | 25 | ||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | constexpr auto max_size = static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize); | ||
| 28 | |||
| 29 | size = size_; | ||
| 30 | if (size > max_size) { | ||
| 31 | size = max_size; | ||
| 32 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the expected size {}!", size_, | ||
| 33 | max_size); | ||
| 34 | } | ||
| 35 | |||
| 36 | // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer | ||
| 37 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | ||
| 38 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { | ||
| 50 | GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; | ||
| 51 | if (!region) { | ||
| 52 | // No reserved surface available, create a new one and reserve it | ||
| 53 | region = std::make_shared<CachedGlobalRegion>(addr, size); | ||
| 54 | ReserveGlobalRegion(region); | ||
| 55 | } | ||
| 56 | region->Reload(size); | ||
| 57 | return region; | ||
| 58 | } | ||
| 59 | |||
| 60 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(const GlobalRegion& region) { | ||
| 61 | reserve[region->GetAddr()] = region; | ||
| 62 | } | ||
| 63 | |||
| 21 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | 64 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) |
| 22 | : RasterizerCache{rasterizer} {} | 65 | : RasterizerCache{rasterizer} {} |
| 23 | 66 | ||
| 67 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 68 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 69 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 70 | |||
| 71 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 72 | const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; | ||
| 73 | const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( | ||
| 74 | cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); | ||
| 75 | ASSERT(cbuf_addr); | ||
| 76 | |||
| 77 | const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); | ||
| 78 | const auto size = Memory::Read32(*cbuf_addr + 8); | ||
| 79 | const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu); | ||
| 80 | ASSERT(actual_addr); | ||
| 81 | |||
| 82 | // Look up global region in the cache based on address | ||
| 83 | GlobalRegion region = TryGet(*actual_addr); | ||
| 84 | |||
| 85 | if (!region) { | ||
| 86 | // No global region found - create a new one | ||
| 87 | region = GetUncachedGlobalRegion(*actual_addr, size); | ||
| 88 | Register(region); | ||
| 89 | } | ||
| 90 | |||
| 91 | return region; | ||
| 92 | } | ||
| 93 | |||
| 24 | } // namespace OpenGL | 94 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index 406a735bc..37830bb7c 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h | |||
| @@ -5,9 +5,13 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <unordered_map> | ||
| 9 | |||
| 8 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 9 | 11 | ||
| 12 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 11 | #include "video_core/rasterizer_cache.h" | 15 | #include "video_core/rasterizer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | 17 | ||
| @@ -40,6 +44,9 @@ public: | |||
| 40 | return buffer.handle; | 44 | return buffer.handle; |
| 41 | } | 45 | } |
| 42 | 46 | ||
| 47 | /// Reloads the global region from guest memory | ||
| 48 | void Reload(u32 size_); | ||
| 49 | |||
| 43 | // TODO(Rodrigo): When global memory is written (STG), implement flushing | 50 | // TODO(Rodrigo): When global memory is written (STG), implement flushing |
| 44 | void Flush() override { | 51 | void Flush() override { |
| 45 | UNIMPLEMENTED(); | 52 | UNIMPLEMENTED(); |
| @@ -55,6 +62,17 @@ private: | |||
| 55 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | 62 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { |
| 56 | public: | 63 | public: |
| 57 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | 64 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); |
| 65 | |||
| 66 | /// Gets the current specified shader stage program | ||
| 67 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 68 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 69 | |||
| 70 | private: | ||
| 71 | GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; | ||
| 72 | GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); | ||
| 73 | void ReserveGlobalRegion(const GlobalRegion& region); | ||
| 74 | |||
| 75 | std::unordered_map<VAddr, GlobalRegion> reserve; | ||
| 58 | }; | 76 | }; |
| 59 | 77 | ||
| 60 | } // namespace OpenGL | 78 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 71829fee0..ca421ef28 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -300,6 +300,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 300 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL | 300 | // Next available bindpoints to use when uploading the const buffers and textures to the GLSL |
| 301 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. | 301 | // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. |
| 302 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | 302 | u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; |
| 303 | u32 current_gmem_bindpoint = 0; | ||
| 303 | u32 current_texture_bindpoint = 0; | 304 | u32 current_texture_bindpoint = 0; |
| 304 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 305 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 305 | 306 | ||
| @@ -358,6 +359,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 358 | SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, | 359 | SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, |
| 359 | current_constbuffer_bindpoint); | 360 | current_constbuffer_bindpoint); |
| 360 | 361 | ||
| 362 | // Configure global memory regions for this shader stage. | ||
| 363 | current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), | ||
| 364 | shader, primitive_mode, current_gmem_bindpoint); | ||
| 365 | |||
| 361 | // Configure the textures for this shader stage. | 366 | // Configure the textures for this shader stage. |
| 362 | current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, | 367 | current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, |
| 363 | primitive_mode, current_texture_bindpoint); | 368 | primitive_mode, current_texture_bindpoint); |
| @@ -993,6 +998,23 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | |||
| 993 | return current_bindpoint + static_cast<u32>(entries.size()); | 998 | return current_bindpoint + static_cast<u32>(entries.size()); |
| 994 | } | 999 | } |
| 995 | 1000 | ||
| 1001 | u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader, | ||
| 1002 | GLenum primitive_mode, u32 current_bindpoint) { | ||
| 1003 | for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) { | ||
| 1004 | const auto& region = | ||
| 1005 | global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage)); | ||
| 1006 | const GLuint block_index{shader->GetProgramResourceIndex(global_region)}; | ||
| 1007 | ASSERT(block_index != GL_INVALID_INDEX); | ||
| 1008 | |||
| 1009 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); | ||
| 1010 | glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index, | ||
| 1011 | current_bindpoint); | ||
| 1012 | ++current_bindpoint; | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | return current_bindpoint; | ||
| 1016 | } | ||
| 1017 | |||
| 996 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | 1018 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, |
| 997 | GLenum primitive_mode, u32 current_unit) { | 1019 | GLenum primitive_mode, u32 current_unit) { |
| 998 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1020 | MICROPROFILE_SCOPE(OpenGL_Texture); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 21c51f874..57ab2f627 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -138,6 +138,16 @@ private: | |||
| 138 | GLenum primitive_mode, u32 current_bindpoint); | 138 | GLenum primitive_mode, u32 current_bindpoint); |
| 139 | 139 | ||
| 140 | /** | 140 | /** |
| 141 | * Configures the current global memory regions to use for the draw command. | ||
| 142 | * @param stage The shader stage to configure buffers for. | ||
| 143 | * @param shader The shader object that contains the specified stage. | ||
| 144 | * @param current_bindpoint The offset at which to start counting new buffer bindpoints. | ||
| 145 | * @returns The next available bindpoint for use in the next shader stage. | ||
| 146 | */ | ||
| 147 | u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | ||
| 148 | GLenum primitive_mode, u32 current_bindpoint); | ||
| 149 | |||
| 150 | /** | ||
| 141 | * Configures the current textures to use for the draw command. | 151 | * Configures the current textures to use for the draw command. |
| 142 | * @param stage The shader stage to configure textures for. | 152 | * @param stage The shader stage to configure textures for. |
| 143 | * @param shader The shader object that contains the specified stage. | 153 | * @param shader The shader object that contains the specified stage. |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b3aca39af..54ec23f3a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -108,11 +108,23 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | |||
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { | 110 | GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { |
| 111 | const auto search{resource_cache.find(buffer.GetHash())}; | 111 | const auto search{cbuf_resource_cache.find(buffer.GetHash())}; |
| 112 | if (search == resource_cache.end()) { | 112 | if (search == cbuf_resource_cache.end()) { |
| 113 | const GLuint index{ | 113 | const GLuint index{ |
| 114 | glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; | 114 | glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; |
| 115 | resource_cache[buffer.GetHash()] = index; | 115 | cbuf_resource_cache[buffer.GetHash()] = index; |
| 116 | return index; | ||
| 117 | } | ||
| 118 | |||
| 119 | return search->second; | ||
| 120 | } | ||
| 121 | |||
| 122 | GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) { | ||
| 123 | const auto search{gmem_resource_cache.find(global_mem.GetHash())}; | ||
| 124 | if (search == gmem_resource_cache.end()) { | ||
| 125 | const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK, | ||
| 126 | global_mem.GetName().c_str())}; | ||
| 127 | gmem_resource_cache[global_mem.GetHash()] = index; | ||
| 116 | return index; | 128 | return index; |
| 117 | } | 129 | } |
| 118 | 130 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0887dd7b..62b1733b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -76,6 +76,9 @@ public: | |||
| 76 | /// Gets the GL program resource location for the specified resource, caching as needed | 76 | /// Gets the GL program resource location for the specified resource, caching as needed |
| 77 | GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); | 77 | GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); |
| 78 | 78 | ||
| 79 | /// Gets the GL program resource location for the specified resource, caching as needed | ||
| 80 | GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem); | ||
| 81 | |||
| 79 | /// Gets the GL uniform location for the specified resource, caching as needed | 82 | /// Gets the GL uniform location for the specified resource, caching as needed |
| 80 | GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); | 83 | GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); |
| 81 | 84 | ||
| @@ -107,7 +110,8 @@ private: | |||
| 107 | OGLProgram triangles_adjacency; | 110 | OGLProgram triangles_adjacency; |
| 108 | } geometry_programs; | 111 | } geometry_programs; |
| 109 | 112 | ||
| 110 | std::map<u32, GLuint> resource_cache; | 113 | std::map<u32, GLuint> cbuf_resource_cache; |
| 114 | std::map<u32, GLuint> gmem_resource_cache; | ||
| 111 | std::map<u32, GLint> uniform_cache; | 115 | std::map<u32, GLint> uniform_cache; |
| 112 | }; | 116 | }; |
| 113 | 117 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3411cf9e6..e072216f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -34,6 +34,8 @@ using Operation = const OperationNode&; | |||
| 34 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | 34 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; |
| 35 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 35 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 36 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 36 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); |
| 37 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | ||
| 38 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | ||
| 37 | 39 | ||
| 38 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | 40 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 39 | 41 | ||
| @@ -143,6 +145,7 @@ public: | |||
| 143 | DeclareInputAttributes(); | 145 | DeclareInputAttributes(); |
| 144 | DeclareOutputAttributes(); | 146 | DeclareOutputAttributes(); |
| 145 | DeclareConstantBuffers(); | 147 | DeclareConstantBuffers(); |
| 148 | DeclareGlobalMemory(); | ||
| 146 | DeclareSamplers(); | 149 | DeclareSamplers(); |
| 147 | 150 | ||
| 148 | code.AddLine("void execute_" + suffix + "() {"); | 151 | code.AddLine("void execute_" + suffix + "() {"); |
| @@ -190,12 +193,15 @@ public: | |||
| 190 | ShaderEntries GetShaderEntries() const { | 193 | ShaderEntries GetShaderEntries() const { |
| 191 | ShaderEntries entries; | 194 | ShaderEntries entries; |
| 192 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 195 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 193 | ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first); | 196 | entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first), |
| 194 | entries.const_buffers.push_back(desc); | 197 | cbuf.first); |
| 195 | } | 198 | } |
| 196 | for (const auto& sampler : ir.GetSamplers()) { | 199 | for (const auto& sampler : ir.GetSamplers()) { |
| 197 | SamplerEntry desc(sampler, stage, GetSampler(sampler)); | 200 | entries.samplers.emplace_back(sampler, stage, GetSampler(sampler)); |
| 198 | entries.samplers.push_back(desc); | 201 | } |
| 202 | for (const auto& gmem : ir.GetGlobalMemoryBases()) { | ||
| 203 | entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage, | ||
| 204 | GetGlobalMemoryBlock(gmem)); | ||
| 199 | } | 205 | } |
| 200 | entries.clip_distances = ir.GetClipDistances(); | 206 | entries.clip_distances = ir.GetClipDistances(); |
| 201 | entries.shader_length = ir.GetLength(); | 207 | entries.shader_length = ir.GetLength(); |
| @@ -375,6 +381,15 @@ private: | |||
| 375 | } | 381 | } |
| 376 | } | 382 | } |
| 377 | 383 | ||
| 384 | void DeclareGlobalMemory() { | ||
| 385 | for (const auto& entry : ir.GetGlobalMemoryBases()) { | ||
| 386 | code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {"); | ||
| 387 | code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); | ||
| 388 | code.AddLine("};"); | ||
| 389 | code.AddNewLine(); | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 378 | void DeclareSamplers() { | 393 | void DeclareSamplers() { |
| 379 | const auto& samplers = ir.GetSamplers(); | 394 | const auto& samplers = ir.GetSamplers(); |
| 380 | for (const auto& sampler : samplers) { | 395 | for (const auto& sampler : samplers) { |
| @@ -538,6 +553,12 @@ private: | |||
| 538 | UNREACHABLE_MSG("Unmanaged offset node type"); | 553 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 539 | } | 554 | } |
| 540 | 555 | ||
| 556 | } else if (const auto gmem = std::get_if<GmemNode>(node)) { | ||
| 557 | const std::string real = Visit(gmem->GetRealAddress()); | ||
| 558 | const std::string base = Visit(gmem->GetBaseAddress()); | ||
| 559 | const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4"; | ||
| 560 | return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); | ||
| 561 | |||
| 541 | } else if (const auto lmem = std::get_if<LmemNode>(node)) { | 562 | } else if (const auto lmem = std::get_if<LmemNode>(node)) { |
| 542 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 563 | return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 543 | 564 | ||
| @@ -1471,6 +1492,15 @@ private: | |||
| 1471 | return GetDeclarationWithSuffix(index, "cbuf"); | 1492 | return GetDeclarationWithSuffix(index, "cbuf"); |
| 1472 | } | 1493 | } |
| 1473 | 1494 | ||
| 1495 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | ||
| 1496 | return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix); | ||
| 1497 | } | ||
| 1498 | |||
| 1499 | std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const { | ||
| 1500 | return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, | ||
| 1501 | suffix); | ||
| 1502 | } | ||
| 1503 | |||
| 1474 | std::string GetConstBufferBlock(u32 index) const { | 1504 | std::string GetConstBufferBlock(u32 index) const { |
| 1475 | return GetDeclarationWithSuffix(index, "cbuf_block"); | 1505 | return GetDeclarationWithSuffix(index, "cbuf_block"); |
| 1476 | } | 1506 | } |
| @@ -1505,8 +1535,10 @@ private: | |||
| 1505 | }; | 1535 | }; |
| 1506 | 1536 | ||
| 1507 | std::string GetCommonDeclarations() { | 1537 | std::string GetCommonDeclarations() { |
| 1508 | return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) + | 1538 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); |
| 1509 | "\n" | 1539 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); |
| 1540 | return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" + | ||
| 1541 | "#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" + | ||
| 1510 | "#define ftoi floatBitsToInt\n" | 1542 | "#define ftoi floatBitsToInt\n" |
| 1511 | "#define ftou floatBitsToUint\n" | 1543 | "#define ftou floatBitsToUint\n" |
| 1512 | "#define itof intBitsToFloat\n" | 1544 | "#define itof intBitsToFloat\n" |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 396a560d8..e47bc3729 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -71,9 +71,43 @@ private: | |||
| 71 | Maxwell::ShaderStage stage{}; | 71 | Maxwell::ShaderStage stage{}; |
| 72 | }; | 72 | }; |
| 73 | 73 | ||
| 74 | class GlobalMemoryEntry { | ||
| 75 | public: | ||
| 76 | explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage, | ||
| 77 | std::string name) | ||
| 78 | : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {} | ||
| 79 | |||
| 80 | u32 GetCbufIndex() const { | ||
| 81 | return cbuf_index; | ||
| 82 | } | ||
| 83 | |||
| 84 | u32 GetCbufOffset() const { | ||
| 85 | return cbuf_offset; | ||
| 86 | } | ||
| 87 | |||
| 88 | const std::string& GetName() const { | ||
| 89 | return name; | ||
| 90 | } | ||
| 91 | |||
| 92 | Maxwell::ShaderStage GetStage() const { | ||
| 93 | return stage; | ||
| 94 | } | ||
| 95 | |||
| 96 | u32 GetHash() const { | ||
| 97 | return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset; | ||
| 98 | } | ||
| 99 | |||
| 100 | private: | ||
| 101 | u32 cbuf_index{}; | ||
| 102 | u32 cbuf_offset{}; | ||
| 103 | Maxwell::ShaderStage stage{}; | ||
| 104 | std::string name; | ||
| 105 | }; | ||
| 106 | |||
| 74 | struct ShaderEntries { | 107 | struct ShaderEntries { |
| 75 | std::vector<ConstBufferEntry> const_buffers; | 108 | std::vector<ConstBufferEntry> const_buffers; |
| 76 | std::vector<SamplerEntry> samplers; | 109 | std::vector<SamplerEntry> samplers; |
| 110 | std::vector<GlobalMemoryEntry> global_memory_entries; | ||
| 77 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 111 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 78 | std::size_t shader_length{}; | 112 | std::size_t shader_length{}; |
| 79 | }; | 113 | }; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ae71672d6..04cb386b7 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | #include <fmt/format.h> | ||
| 7 | 8 | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -119,6 +120,54 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | |||
| 119 | } | 120 | } |
| 120 | break; | 121 | break; |
| 121 | } | 122 | } |
| 123 | case OpCode::Id::LDG: { | ||
| 124 | const u32 count = [&]() { | ||
| 125 | switch (instr.ldg.type) { | ||
| 126 | case Tegra::Shader::UniformType::Single: | ||
| 127 | return 1; | ||
| 128 | case Tegra::Shader::UniformType::Double: | ||
| 129 | return 2; | ||
| 130 | case Tegra::Shader::UniformType::Quad: | ||
| 131 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 132 | return 4; | ||
| 133 | default: | ||
| 134 | UNIMPLEMENTED_MSG("Unimplemented LDG size!"); | ||
| 135 | return 1; | ||
| 136 | } | ||
| 137 | }(); | ||
| 138 | |||
| 139 | const Node addr_register = GetRegister(instr.gpr8); | ||
| 140 | const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size())); | ||
| 141 | const auto cbuf = std::get_if<CbufNode>(base_address); | ||
| 142 | ASSERT(cbuf != nullptr); | ||
| 143 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||
| 144 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 145 | const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4; | ||
| 146 | |||
| 147 | bb.push_back(Comment( | ||
| 148 | fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 149 | |||
| 150 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | ||
| 151 | used_global_memory_bases.insert(descriptor); | ||
| 152 | |||
| 153 | const Node immediate_offset = | ||
| 154 | Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); | ||
| 155 | const Node base_real_address = | ||
| 156 | Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); | ||
| 157 | |||
| 158 | for (u32 i = 0; i < count; ++i) { | ||
| 159 | const Node it_offset = Immediate(i * 4); | ||
| 160 | const Node real_address = | ||
| 161 | Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); | ||
| 162 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | ||
| 163 | |||
| 164 | SetTemporal(bb, i, gmem); | ||
| 165 | } | ||
| 166 | for (u32 i = 0; i < count; ++i) { | ||
| 167 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 168 | } | ||
| 169 | break; | ||
| 170 | } | ||
| 122 | case OpCode::Id::ST_A: { | 171 | case OpCode::Id::ST_A: { |
| 123 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | 172 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
| 124 | "Indirect attribute loads are not supported"); | 173 | "Indirect attribute loads are not supported"); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ef8f94480..c4ecb2e3c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -257,6 +257,15 @@ private: | |||
| 257 | bool is_indirect{}; | 257 | bool is_indirect{}; |
| 258 | }; | 258 | }; |
| 259 | 259 | ||
| 260 | struct GlobalMemoryBase { | ||
| 261 | u32 cbuf_index{}; | ||
| 262 | u32 cbuf_offset{}; | ||
| 263 | |||
| 264 | bool operator<(const GlobalMemoryBase& rhs) const { | ||
| 265 | return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); | ||
| 266 | } | ||
| 267 | }; | ||
| 268 | |||
| 260 | struct MetaArithmetic { | 269 | struct MetaArithmetic { |
| 261 | bool precise{}; | 270 | bool precise{}; |
| 262 | }; | 271 | }; |
| @@ -478,14 +487,26 @@ private: | |||
| 478 | /// Global memory node | 487 | /// Global memory node |
| 479 | class GmemNode final { | 488 | class GmemNode final { |
| 480 | public: | 489 | public: |
| 481 | explicit constexpr GmemNode(Node address) : address{address} {} | 490 | explicit constexpr GmemNode(Node real_address, Node base_address, |
| 491 | const GlobalMemoryBase& descriptor) | ||
| 492 | : real_address{real_address}, base_address{base_address}, descriptor{descriptor} {} | ||
| 482 | 493 | ||
| 483 | Node GetAddress() const { | 494 | Node GetRealAddress() const { |
| 484 | return address; | 495 | return real_address; |
| 496 | } | ||
| 497 | |||
| 498 | Node GetBaseAddress() const { | ||
| 499 | return base_address; | ||
| 500 | } | ||
| 501 | |||
| 502 | const GlobalMemoryBase& GetDescriptor() const { | ||
| 503 | return descriptor; | ||
| 485 | } | 504 | } |
| 486 | 505 | ||
| 487 | private: | 506 | private: |
| 488 | const Node address; | 507 | const Node real_address; |
| 508 | const Node base_address; | ||
| 509 | const GlobalMemoryBase descriptor; | ||
| 489 | }; | 510 | }; |
| 490 | 511 | ||
| 491 | /// Commentary, can be dropped | 512 | /// Commentary, can be dropped |
| @@ -543,6 +564,10 @@ public: | |||
| 543 | return used_clip_distances; | 564 | return used_clip_distances; |
| 544 | } | 565 | } |
| 545 | 566 | ||
| 567 | const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { | ||
| 568 | return used_global_memory_bases; | ||
| 569 | } | ||
| 570 | |||
| 546 | std::size_t GetLength() const { | 571 | std::size_t GetLength() const { |
| 547 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | 572 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
| 548 | } | 573 | } |
| @@ -734,6 +759,10 @@ private: | |||
| 734 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 759 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 735 | Node op_c, Node imm_lut, bool sets_cc); | 760 | Node op_c, Node imm_lut, bool sets_cc); |
| 736 | 761 | ||
| 762 | Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor); | ||
| 763 | |||
| 764 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor); | ||
| 765 | |||
| 737 | template <typename... T> | 766 | template <typename... T> |
| 738 | Node Operation(OperationCode code, const T*... operands) { | 767 | Node Operation(OperationCode code, const T*... operands) { |
| 739 | return StoreNode(OperationNode(code, operands...)); | 768 | return StoreNode(OperationNode(code, operands...)); |
| @@ -786,6 +815,7 @@ private: | |||
| 786 | std::map<u32, ConstBuffer> used_cbufs; | 815 | std::map<u32, ConstBuffer> used_cbufs; |
| 787 | std::set<Sampler> used_samplers; | 816 | std::set<Sampler> used_samplers; |
| 788 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 817 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 818 | std::set<GlobalMemoryBase> used_global_memory_bases; | ||
| 789 | 819 | ||
| 790 | Tegra::Shader::Header header; | 820 | Tegra::Shader::Header header; |
| 791 | }; | 821 | }; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp new file mode 100644 index 000000000..d6d29ee9f --- /dev/null +++ b/src/video_core/shader/track.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <variant> | ||
| 8 | |||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor, | ||
| 15 | OperationCode operation_code) { | ||
| 16 | for (; cursor >= 0; --cursor) { | ||
| 17 | const Node node = code[cursor]; | ||
| 18 | if (const auto operation = std::get_if<OperationNode>(node)) { | ||
| 19 | if (operation->GetCode() == operation_code) | ||
| 20 | return {node, cursor}; | ||
| 21 | } | ||
| 22 | } | ||
| 23 | return {}; | ||
| 24 | } | ||
| 25 | } // namespace | ||
| 26 | |||
| 27 | Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) { | ||
| 28 | if (const auto cbuf = std::get_if<CbufNode>(tracked)) { | ||
| 29 | // Cbuf found, but it has to be immediate | ||
| 30 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | ||
| 31 | } | ||
| 32 | if (const auto gpr = std::get_if<GprNode>(tracked)) { | ||
| 33 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 34 | return nullptr; | ||
| 35 | } | ||
| 36 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 37 | // register that it uses as operand | ||
| 38 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 39 | if (!source) { | ||
| 40 | return nullptr; | ||
| 41 | } | ||
| 42 | return TrackCbuf(source, code, new_cursor); | ||
| 43 | } | ||
| 44 | if (const auto operation = std::get_if<OperationNode>(tracked)) { | ||
| 45 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | ||
| 46 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | ||
| 47 | // Cbuf found in operand | ||
| 48 | return found; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | return nullptr; | ||
| 52 | } | ||
| 53 | return nullptr; | ||
| 54 | } | ||
| 55 | |||
| 56 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code, | ||
| 57 | s64 cursor) { | ||
| 58 | for (; cursor >= 0; --cursor) { | ||
| 59 | const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | ||
| 60 | if (!found_node) { | ||
| 61 | return {}; | ||
| 62 | } | ||
| 63 | const auto operation = std::get_if<OperationNode>(found_node); | ||
| 64 | ASSERT(operation); | ||
| 65 | |||
| 66 | const auto& target = (*operation)[0]; | ||
| 67 | if (const auto gpr_target = std::get_if<GprNode>(target)) { | ||
| 68 | if (gpr_target->GetIndex() == tracked->GetIndex()) { | ||
| 69 | return {(*operation)[1], new_cursor}; | ||
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | return {}; | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader | ||