diff options
| author | 2018-08-27 20:35:58 -0400 | |
|---|---|---|
| committer | 2018-08-27 20:35:58 -0400 | |
| commit | ffe2336136dc683b8d97a355c2446aad2aaa5905 (patch) | |
| tree | 62cc8e3ff2dcf995f9f3ae7c8928a27dada3733d /src | |
| parent | Merge pull request #1189 from FearlessTobi/fix-stick-directions (diff) | |
| parent | renderer_opengl: Implement a new shader cache. (diff) | |
| download | yuzu-ffe2336136dc683b8d97a355c2446aad2aaa5905.tar.gz yuzu-ffe2336136dc683b8d97a355c2446aad2aaa5905.tar.xz yuzu-ffe2336136dc683b8d97a355c2446aad2aaa5905.zip | |
Merge pull request #1165 from bunnei/shader-cache
renderer_opengl: Implement a new shader cache.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/video_core/rasterizer_cache.h | 116 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 89 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 37 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 131 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 69 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 75 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.h | 126 |
12 files changed, 387 insertions, 417 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c6431e722..aa5bc3bbe 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -18,6 +18,7 @@ add_library(video_core STATIC | |||
| 18 | macro_interpreter.h | 18 | macro_interpreter.h |
| 19 | memory_manager.cpp | 19 | memory_manager.cpp |
| 20 | memory_manager.h | 20 | memory_manager.h |
| 21 | rasterizer_cache.h | ||
| 21 | rasterizer_interface.h | 22 | rasterizer_interface.h |
| 22 | renderer_base.cpp | 23 | renderer_base.cpp |
| 23 | renderer_base.h | 24 | renderer_base.h |
| @@ -26,6 +27,8 @@ add_library(video_core STATIC | |||
| 26 | renderer_opengl/gl_rasterizer_cache.cpp | 27 | renderer_opengl/gl_rasterizer_cache.cpp |
| 27 | renderer_opengl/gl_rasterizer_cache.h | 28 | renderer_opengl/gl_rasterizer_cache.h |
| 28 | renderer_opengl/gl_resource_manager.h | 29 | renderer_opengl/gl_resource_manager.h |
| 30 | renderer_opengl/gl_shader_cache.cpp | ||
| 31 | renderer_opengl/gl_shader_cache.h | ||
| 29 | renderer_opengl/gl_shader_decompiler.cpp | 32 | renderer_opengl/gl_shader_decompiler.cpp |
| 30 | renderer_opengl/gl_shader_decompiler.h | 33 | renderer_opengl/gl_shader_decompiler.h |
| 31 | renderer_opengl/gl_shader_gen.cpp | 34 | renderer_opengl/gl_shader_gen.cpp |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h new file mode 100644 index 000000000..7a0492a4e --- /dev/null +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | #include <boost/icl/interval_map.hpp> | ||
| 9 | #include <boost/range/iterator_range.hpp> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "core/memory.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | |||
| 15 | template <class T> | ||
| 16 | class RasterizerCache : NonCopyable { | ||
| 17 | public: | ||
| 18 | /// Mark the specified region as being invalidated | ||
| 19 | void InvalidateRegion(Tegra::GPUVAddr region_addr, size_t region_size) { | ||
| 20 | for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) { | ||
| 21 | const auto& object{iter->second}; | ||
| 22 | |||
| 23 | ++iter; | ||
| 24 | |||
| 25 | if (object->GetAddr() <= (region_addr + region_size) && | ||
| 26 | region_addr <= (object->GetAddr() + object->GetSizeInBytes())) { | ||
| 27 | // Regions overlap, so invalidate | ||
| 28 | Unregister(object); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | protected: | ||
| 34 | /// Tries to get an object from the cache with the specified address | ||
| 35 | T TryGet(Tegra::GPUVAddr addr) const { | ||
| 36 | const auto& search{cached_objects.find(addr)}; | ||
| 37 | if (search != cached_objects.end()) { | ||
| 38 | return search->second; | ||
| 39 | } | ||
| 40 | |||
| 41 | return nullptr; | ||
| 42 | } | ||
| 43 | |||
| 44 | /// Gets a reference to the cache | ||
| 45 | const std::unordered_map<Tegra::GPUVAddr, T>& GetCache() const { | ||
| 46 | return cached_objects; | ||
| 47 | } | ||
| 48 | |||
| 49 | /// Register an object into the cache | ||
| 50 | void Register(const T& object) { | ||
| 51 | const auto& search{cached_objects.find(object->GetAddr())}; | ||
| 52 | if (search != cached_objects.end()) { | ||
| 53 | // Registered already | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | |||
| 57 | cached_objects[object->GetAddr()] = object; | ||
| 58 | UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); | ||
| 59 | } | ||
| 60 | |||
| 61 | /// Unregisters an object from the cache | ||
| 62 | void Unregister(const T& object) { | ||
| 63 | const auto& search{cached_objects.find(object->GetAddr())}; | ||
| 64 | if (search == cached_objects.end()) { | ||
| 65 | // Unregistered already | ||
| 66 | return; | ||
| 67 | } | ||
| 68 | |||
| 69 | UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); | ||
| 70 | cached_objects.erase(search); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | using PageMap = boost::icl::interval_map<u64, int>; | ||
| 75 | |||
| 76 | template <typename Map, typename Interval> | ||
| 77 | constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||
| 78 | return boost::make_iterator_range(map.equal_range(interval)); | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Increase/decrease the number of object in pages touching the specified region | ||
| 82 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | ||
| 83 | const u64 page_start{addr >> Tegra::MemoryManager::PAGE_BITS}; | ||
| 84 | const u64 page_end{(addr + size) >> Tegra::MemoryManager::PAGE_BITS}; | ||
| 85 | |||
| 86 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||
| 87 | // subtract after iterating | ||
| 88 | const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); | ||
| 89 | if (delta > 0) | ||
| 90 | cached_pages.add({pages_interval, delta}); | ||
| 91 | |||
| 92 | for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||
| 93 | const auto interval = pair.first & pages_interval; | ||
| 94 | const int count = pair.second; | ||
| 95 | |||
| 96 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | ||
| 97 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 98 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 99 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 100 | const u64 interval_size = interval_end_addr - interval_start_addr; | ||
| 101 | |||
| 102 | if (delta > 0 && count == delta) | ||
| 103 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||
| 104 | else if (delta < 0 && count == -delta) | ||
| 105 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||
| 106 | else | ||
| 107 | ASSERT(count >= 0); | ||
| 108 | } | ||
| 109 | |||
| 110 | if (delta < 0) | ||
| 111 | cached_pages.add({pages_interval, delta}); | ||
| 112 | } | ||
| 113 | |||
| 114 | std::unordered_map<Tegra::GPUVAddr, T> cached_objects; | ||
| 115 | PageMap cached_pages; | ||
| 116 | }; | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 96851ccb5..9951d8178 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -178,19 +178,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | |||
| 178 | return {array_ptr, buffer_offset}; | 178 | return {array_ptr, buffer_offset}; |
| 179 | } | 179 | } |
| 180 | 180 | ||
| 181 | static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) { | ||
| 182 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 183 | |||
| 184 | // Fetch program code from memory | ||
| 185 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | ||
| 186 | auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; | ||
| 187 | const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | ||
| 188 | const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)}; | ||
| 189 | Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | ||
| 190 | |||
| 191 | return program_code; | ||
| 192 | } | ||
| 193 | |||
| 194 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | 181 | std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { |
| 195 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 182 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 196 | 183 | ||
| @@ -224,31 +211,17 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr | |||
| 224 | buffer_ptr += sizeof(ubo); | 211 | buffer_ptr += sizeof(ubo); |
| 225 | buffer_offset += sizeof(ubo); | 212 | buffer_offset += sizeof(ubo); |
| 226 | 213 | ||
| 227 | GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; | 214 | const Tegra::GPUVAddr addr{gpu.regs.code_address.CodeAddress() + shader_config.offset}; |
| 228 | GLShader::ShaderEntries shader_resources; | 215 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 229 | 216 | ||
| 230 | switch (program) { | 217 | switch (program) { |
| 231 | case Maxwell::ShaderProgram::VertexA: { | 218 | case Maxwell::ShaderProgram::VertexA: |
| 232 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | ||
| 233 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | ||
| 234 | // stage here. | ||
| 235 | setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB)); | ||
| 236 | GLShader::MaxwellVSConfig vs_config{setup}; | ||
| 237 | shader_resources = | ||
| 238 | shader_program_manager->UseProgrammableVertexShader(vs_config, setup); | ||
| 239 | break; | ||
| 240 | } | ||
| 241 | |||
| 242 | case Maxwell::ShaderProgram::VertexB: { | 219 | case Maxwell::ShaderProgram::VertexB: { |
| 243 | GLShader::MaxwellVSConfig vs_config{setup}; | 220 | shader_program_manager->UseProgrammableVertexShader(shader->GetProgramHandle()); |
| 244 | shader_resources = | ||
| 245 | shader_program_manager->UseProgrammableVertexShader(vs_config, setup); | ||
| 246 | break; | 221 | break; |
| 247 | } | 222 | } |
| 248 | case Maxwell::ShaderProgram::Fragment: { | 223 | case Maxwell::ShaderProgram::Fragment: { |
| 249 | GLShader::MaxwellFSConfig fs_config{setup}; | 224 | shader_program_manager->UseProgrammableFragmentShader(shader->GetProgramHandle()); |
| 250 | shader_resources = | ||
| 251 | shader_program_manager->UseProgrammableFragmentShader(fs_config, setup); | ||
| 252 | break; | 225 | break; |
| 253 | } | 226 | } |
| 254 | default: | 227 | default: |
| @@ -257,18 +230,14 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr | |||
| 257 | UNREACHABLE(); | 230 | UNREACHABLE(); |
| 258 | } | 231 | } |
| 259 | 232 | ||
| 260 | GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage( | ||
| 261 | static_cast<Maxwell::ShaderStage>(stage)); | ||
| 262 | |||
| 263 | // Configure the const buffers for this shader stage. | 233 | // Configure the const buffers for this shader stage. |
| 264 | std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( | 234 | std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = |
| 265 | buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, | 235 | SetupConstBuffers(buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), |
| 266 | current_constbuffer_bindpoint, shader_resources.const_buffer_entries); | 236 | shader, current_constbuffer_bindpoint); |
| 267 | 237 | ||
| 268 | // Configure the textures for this shader stage. | 238 | // Configure the textures for this shader stage. |
| 269 | current_texture_bindpoint = | 239 | current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, |
| 270 | SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, | 240 | current_texture_bindpoint); |
| 271 | current_texture_bindpoint, shader_resources.texture_samplers); | ||
| 272 | 241 | ||
| 273 | // When VertexA is enabled, we have dual vertex shaders | 242 | // When VertexA is enabled, we have dual vertex shaders |
| 274 | if (program == Maxwell::ShaderProgram::VertexA) { | 243 | if (program == Maxwell::ShaderProgram::VertexA) { |
| @@ -571,23 +540,21 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} | |||
| 571 | 540 | ||
| 572 | void RasterizerOpenGL::FlushAll() { | 541 | void RasterizerOpenGL::FlushAll() { |
| 573 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 542 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 574 | res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); | ||
| 575 | } | 543 | } |
| 576 | 544 | ||
| 577 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { | 545 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { |
| 578 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 546 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 579 | res_cache.FlushRegion(addr, size); | ||
| 580 | } | 547 | } |
| 581 | 548 | ||
| 582 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 549 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { |
| 583 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 550 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 584 | res_cache.InvalidateRegion(addr, size); | 551 | res_cache.InvalidateRegion(addr, size); |
| 552 | shader_cache.InvalidateRegion(addr, size); | ||
| 585 | } | 553 | } |
| 586 | 554 | ||
| 587 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 555 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { |
| 588 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 556 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 589 | res_cache.FlushRegion(addr, size); | 557 | InvalidateRegion(addr, size); |
| 590 | res_cache.InvalidateRegion(addr, size); | ||
| 591 | } | 558 | } |
| 592 | 559 | ||
| 593 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { | 560 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { |
| @@ -672,15 +639,17 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | |||
| 672 | } | 639 | } |
| 673 | } | 640 | } |
| 674 | 641 | ||
| 675 | std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( | 642 | std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr, |
| 676 | u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, | 643 | GLintptr buffer_offset, |
| 677 | u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { | 644 | Maxwell::ShaderStage stage, |
| 645 | Shader& shader, | ||
| 646 | u32 current_bindpoint) { | ||
| 678 | const auto& gpu = Core::System::GetInstance().GPU(); | 647 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 679 | const auto& maxwell3d = gpu.Maxwell3D(); | 648 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 680 | |||
| 681 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | ||
| 682 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; | 649 | const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; |
| 650 | const auto& entries = shader->GetShaderEntries().const_buffer_entries; | ||
| 683 | 651 | ||
| 652 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | ||
| 684 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 653 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 685 | const auto& used_buffer = entries[bindpoint]; | 654 | const auto& used_buffer = entries[bindpoint]; |
| 686 | const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; | 655 | const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; |
| @@ -719,12 +688,9 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( | |||
| 719 | stream_buffer.GetHandle(), const_buffer_offset, size); | 688 | stream_buffer.GetHandle(), const_buffer_offset, size); |
| 720 | 689 | ||
| 721 | // Now configure the bindpoint of the buffer inside the shader | 690 | // Now configure the bindpoint of the buffer inside the shader |
| 722 | const std::string buffer_name = used_buffer.GetName(); | 691 | glUniformBlockBinding(shader->GetProgramHandle(), |
| 723 | const GLuint index = | 692 | shader->GetProgramResourceIndex(used_buffer.GetName()), |
| 724 | glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); | 693 | current_bindpoint + bindpoint); |
| 725 | if (index != GL_INVALID_INDEX) { | ||
| 726 | glUniformBlockBinding(program, index, current_bindpoint + bindpoint); | ||
| 727 | } | ||
| 728 | } | 694 | } |
| 729 | 695 | ||
| 730 | state.Apply(); | 696 | state.Apply(); |
| @@ -732,10 +698,10 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( | |||
| 732 | return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; | 698 | return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; |
| 733 | } | 699 | } |
| 734 | 700 | ||
| 735 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, | 701 | u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) { |
| 736 | const std::vector<GLShader::SamplerEntry>& entries) { | ||
| 737 | const auto& gpu = Core::System::GetInstance().GPU(); | 702 | const auto& gpu = Core::System::GetInstance().GPU(); |
| 738 | const auto& maxwell3d = gpu.Maxwell3D(); | 703 | const auto& maxwell3d = gpu.Maxwell3D(); |
| 704 | const auto& entries = shader->GetShaderEntries().texture_samplers; | ||
| 739 | 705 | ||
| 740 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), | 706 | ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), |
| 741 | "Exceeded the number of active textures."); | 707 | "Exceeded the number of active textures."); |
| @@ -745,12 +711,9 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, | |||
| 745 | u32 current_bindpoint = current_unit + bindpoint; | 711 | u32 current_bindpoint = current_unit + bindpoint; |
| 746 | 712 | ||
| 747 | // Bind the uniform to the sampler. | 713 | // Bind the uniform to the sampler. |
| 748 | GLint uniform = glGetUniformLocation(program, entry.GetName().c_str()); | ||
| 749 | if (uniform == -1) { | ||
| 750 | continue; | ||
| 751 | } | ||
| 752 | 714 | ||
| 753 | glProgramUniform1i(program, uniform, current_bindpoint); | 715 | glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry.GetName()), |
| 716 | current_bindpoint); | ||
| 754 | 717 | ||
| 755 | const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); | 718 | const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); |
| 756 | 719 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 531b04046..7dd329efe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 18 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 20 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 22 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 22 | #include "video_core/renderer_opengl/gl_state.h" | 23 | #include "video_core/renderer_opengl/gl_state.h" |
| @@ -99,26 +100,23 @@ private: | |||
| 99 | /* | 100 | /* |
| 100 | * Configures the current constbuffers to use for the draw command. | 101 | * Configures the current constbuffers to use for the draw command. |
| 101 | * @param stage The shader stage to configure buffers for. | 102 | * @param stage The shader stage to configure buffers for. |
| 102 | * @param program The OpenGL program object that contains the specified stage. | 103 | * @param shader The shader object that contains the specified stage. |
| 103 | * @param current_bindpoint The offset at which to start counting new buffer bindpoints. | 104 | * @param current_bindpoint The offset at which to start counting new buffer bindpoints. |
| 104 | * @param entries Vector describing the buffers that are actually used in the guest shader. | ||
| 105 | * @returns The next available bindpoint for use in the next shader stage. | 105 | * @returns The next available bindpoint for use in the next shader stage. |
| 106 | */ | 106 | */ |
| 107 | std::tuple<u8*, GLintptr, u32> SetupConstBuffers( | 107 | std::tuple<u8*, GLintptr, u32> SetupConstBuffers( |
| 108 | u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 108 | u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 109 | GLuint program, u32 current_bindpoint, | 109 | Shader& shader, u32 current_bindpoint); |
| 110 | const std::vector<GLShader::ConstBufferEntry>& entries); | ||
| 111 | 110 | ||
| 112 | /* | 111 | /* |
| 113 | * Configures the current textures to use for the draw command. | 112 | * Configures the current textures to use for the draw command. |
| 114 | * @param stage The shader stage to configure textures for. | 113 | * @param stage The shader stage to configure textures for. |
| 115 | * @param program The OpenGL program object that contains the specified stage. | 114 | * @param shader The shader object that contains the specified stage. |
| 116 | * @param current_unit The offset at which to start counting unused texture units. | 115 | * @param current_unit The offset at which to start counting unused texture units. |
| 117 | * @param entries Vector describing the textures that are actually used in the guest shader. | ||
| 118 | * @returns The next available bindpoint for use in the next shader stage. | 116 | * @returns The next available bindpoint for use in the next shader stage. |
| 119 | */ | 117 | */ |
| 120 | u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, | 118 | u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, |
| 121 | u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); | 119 | u32 current_unit); |
| 122 | 120 | ||
| 123 | /// Syncs the viewport to match the guest state | 121 | /// Syncs the viewport to match the guest state |
| 124 | void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); | 122 | void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); |
| @@ -157,6 +155,7 @@ private: | |||
| 157 | OpenGLState state; | 155 | OpenGLState state; |
| 158 | 156 | ||
| 159 | RasterizerCacheOpenGL res_cache; | 157 | RasterizerCacheOpenGL res_cache; |
| 158 | ShaderCacheOpenGL shader_cache; | ||
| 160 | 159 | ||
| 161 | Core::Frontend::EmuWindow& emu_window; | 160 | Core::Frontend::EmuWindow& emu_window; |
| 162 | 161 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 83d8d3d94..65305000c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -677,12 +677,6 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | |||
| 677 | draw_framebuffer.Create(); | 677 | draw_framebuffer.Create(); |
| 678 | } | 678 | } |
| 679 | 679 | ||
| 680 | RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | ||
| 681 | while (!surface_cache.empty()) { | ||
| 682 | UnregisterSurface(surface_cache.begin()->second); | ||
| 683 | } | ||
| 684 | } | ||
| 685 | |||
| 686 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { | 680 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { |
| 687 | return GetSurface(SurfaceParams::CreateForTexture(config)); | 681 | return GetSurface(SurfaceParams::CreateForTexture(config)); |
| 688 | } | 682 | } |
| @@ -766,27 +760,25 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 766 | return {}; | 760 | return {}; |
| 767 | 761 | ||
| 768 | // Look up surface in the cache based on address | 762 | // Look up surface in the cache based on address |
| 769 | const auto& search{surface_cache.find(params.addr)}; | 763 | Surface surface{TryGet(params.addr)}; |
| 770 | Surface surface; | 764 | if (surface) { |
| 771 | if (search != surface_cache.end()) { | ||
| 772 | surface = search->second; | ||
| 773 | if (Settings::values.use_accurate_framebuffers) { | 765 | if (Settings::values.use_accurate_framebuffers) { |
| 774 | // If use_accurate_framebuffers is enabled, always load from memory | 766 | // If use_accurate_framebuffers is enabled, always load from memory |
| 775 | FlushSurface(surface); | 767 | FlushSurface(surface); |
| 776 | UnregisterSurface(surface); | 768 | Unregister(surface); |
| 777 | } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | 769 | } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { |
| 778 | // Use the cached surface as-is | 770 | // Use the cached surface as-is |
| 779 | return surface; | 771 | return surface; |
| 780 | } else if (preserve_contents) { | 772 | } else if (preserve_contents) { |
| 781 | // If surface parameters changed and we care about keeping the previous data, recreate | 773 | // If surface parameters changed and we care about keeping the previous data, recreate |
| 782 | // the surface from the old one | 774 | // the surface from the old one |
| 783 | UnregisterSurface(surface); | 775 | Unregister(surface); |
| 784 | Surface new_surface{RecreateSurface(surface, params)}; | 776 | Surface new_surface{RecreateSurface(surface, params)}; |
| 785 | RegisterSurface(new_surface); | 777 | Register(new_surface); |
| 786 | return new_surface; | 778 | return new_surface; |
| 787 | } else { | 779 | } else { |
| 788 | // Delete the old surface before creating a new one to prevent collisions. | 780 | // Delete the old surface before creating a new one to prevent collisions. |
| 789 | UnregisterSurface(surface); | 781 | Unregister(surface); |
| 790 | } | 782 | } |
| 791 | } | 783 | } |
| 792 | 784 | ||
| @@ -797,7 +789,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 797 | if (!surface) { | 789 | if (!surface) { |
| 798 | surface = std::make_shared<CachedSurface>(params); | 790 | surface = std::make_shared<CachedSurface>(params); |
| 799 | ReserveSurface(surface); | 791 | ReserveSurface(surface); |
| 800 | RegisterSurface(surface); | 792 | Register(surface); |
| 801 | } | 793 | } |
| 802 | 794 | ||
| 803 | // Only load surface from memory if we care about the contents | 795 | // Only load surface from memory if we care about the contents |
| @@ -894,7 +886,7 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { | |||
| 894 | // framebuffer overlaps surfaces. | 886 | // framebuffer overlaps surfaces. |
| 895 | 887 | ||
| 896 | std::vector<Surface> surfaces; | 888 | std::vector<Surface> surfaces; |
| 897 | for (const auto& surface : surface_cache) { | 889 | for (const auto& surface : GetCache()) { |
| 898 | const auto& params = surface.second->GetSurfaceParams(); | 890 | const auto& params = surface.second->GetSurfaceParams(); |
| 899 | const VAddr surface_cpu_addr = params.GetCpuAddr(); | 891 | const VAddr surface_cpu_addr = params.GetCpuAddr(); |
| 900 | if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { | 892 | if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { |
| @@ -912,51 +904,6 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { | |||
| 912 | return surfaces[0]; | 904 | return surfaces[0]; |
| 913 | } | 905 | } |
| 914 | 906 | ||
| 915 | void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { | ||
| 916 | // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should | ||
| 917 | // probably implement this in the future, but for now, the `use_accurate_framebufers` setting | ||
| 918 | // can be used to always flush. | ||
| 919 | } | ||
| 920 | |||
| 921 | void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { | ||
| 922 | for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) { | ||
| 923 | const auto& surface{iter->second}; | ||
| 924 | const auto& params{surface->GetSurfaceParams()}; | ||
| 925 | |||
| 926 | ++iter; | ||
| 927 | |||
| 928 | if (params.IsOverlappingRegion(addr, size)) { | ||
| 929 | UnregisterSurface(surface); | ||
| 930 | } | ||
| 931 | } | ||
| 932 | } | ||
| 933 | |||
| 934 | void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { | ||
| 935 | const auto& params{surface->GetSurfaceParams()}; | ||
| 936 | const auto& search{surface_cache.find(params.addr)}; | ||
| 937 | |||
| 938 | if (search != surface_cache.end()) { | ||
| 939 | // Registered already | ||
| 940 | return; | ||
| 941 | } | ||
| 942 | |||
| 943 | surface_cache[params.addr] = surface; | ||
| 944 | UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); | ||
| 945 | } | ||
| 946 | |||
| 947 | void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { | ||
| 948 | const auto& params{surface->GetSurfaceParams()}; | ||
| 949 | const auto& search{surface_cache.find(params.addr)}; | ||
| 950 | |||
| 951 | if (search == surface_cache.end()) { | ||
| 952 | // Unregistered already | ||
| 953 | return; | ||
| 954 | } | ||
| 955 | |||
| 956 | UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); | ||
| 957 | surface_cache.erase(search); | ||
| 958 | } | ||
| 959 | |||
| 960 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | 907 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { |
| 961 | const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; | 908 | const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; |
| 962 | surface_reserve[surface_reserve_key] = surface; | 909 | surface_reserve[surface_reserve_key] = surface; |
| @@ -966,49 +913,10 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params | |||
| 966 | const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; | 913 | const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; |
| 967 | auto search{surface_reserve.find(surface_reserve_key)}; | 914 | auto search{surface_reserve.find(surface_reserve_key)}; |
| 968 | if (search != surface_reserve.end()) { | 915 | if (search != surface_reserve.end()) { |
| 969 | RegisterSurface(search->second); | 916 | Register(search->second); |
| 970 | return search->second; | 917 | return search->second; |
| 971 | } | 918 | } |
| 972 | return {}; | 919 | return {}; |
| 973 | } | 920 | } |
| 974 | 921 | ||
| 975 | template <typename Map, typename Interval> | ||
| 976 | constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||
| 977 | return boost::make_iterator_range(map.equal_range(interval)); | ||
| 978 | } | ||
| 979 | |||
| 980 | void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | ||
| 981 | const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - | ||
| 982 | (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; | ||
| 983 | const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; | ||
| 984 | const u64 page_end = page_start + num_pages; | ||
| 985 | |||
| 986 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||
| 987 | // subtract after iterating | ||
| 988 | const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); | ||
| 989 | if (delta > 0) | ||
| 990 | cached_pages.add({pages_interval, delta}); | ||
| 991 | |||
| 992 | for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||
| 993 | const auto interval = pair.first & pages_interval; | ||
| 994 | const int count = pair.second; | ||
| 995 | |||
| 996 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | ||
| 997 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 998 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 999 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 1000 | const u64 interval_size = interval_end_addr - interval_start_addr; | ||
| 1001 | |||
| 1002 | if (delta > 0 && count == delta) | ||
| 1003 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||
| 1004 | else if (delta < 0 && count == -delta) | ||
| 1005 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||
| 1006 | else | ||
| 1007 | ASSERT(count >= 0); | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | if (delta < 0) | ||
| 1011 | cached_pages.add({pages_interval, delta}); | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | } // namespace OpenGL | 922 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index c8c615df2..8a6ca2a4b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -8,12 +8,12 @@ | |||
| 8 | #include <map> | 8 | #include <map> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | #include <boost/icl/interval_map.hpp> | ||
| 12 | 11 | ||
| 13 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 14 | #include "common/hash.h" | 13 | #include "common/hash.h" |
| 15 | #include "common/math_util.h" | 14 | #include "common/math_util.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 15 | #include "video_core/engines/maxwell_3d.h" |
| 16 | #include "video_core/rasterizer_cache.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | #include "video_core/textures/texture.h" | 18 | #include "video_core/textures/texture.h" |
| 19 | 19 | ||
| @@ -22,7 +22,6 @@ namespace OpenGL { | |||
| 22 | class CachedSurface; | 22 | class CachedSurface; |
| 23 | using Surface = std::shared_ptr<CachedSurface>; | 23 | using Surface = std::shared_ptr<CachedSurface>; |
| 24 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; | 24 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; |
| 25 | using PageMap = boost::icl::interval_map<u64, int>; | ||
| 26 | 25 | ||
| 27 | struct SurfaceParams { | 26 | struct SurfaceParams { |
| 28 | enum class PixelFormat { | 27 | enum class PixelFormat { |
| @@ -632,11 +631,6 @@ struct SurfaceParams { | |||
| 632 | /// Returns the CPU virtual address for this surface | 631 | /// Returns the CPU virtual address for this surface |
| 633 | VAddr GetCpuAddr() const; | 632 | VAddr GetCpuAddr() const; |
| 634 | 633 | ||
| 635 | /// Returns true if the specified region overlaps with this surface's region in Switch memory | ||
| 636 | bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { | ||
| 637 | return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); | ||
| 638 | } | ||
| 639 | |||
| 640 | /// Creates SurfaceParams from a texture configuration | 634 | /// Creates SurfaceParams from a texture configuration |
| 641 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | 635 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); |
| 642 | 636 | ||
| @@ -708,6 +702,14 @@ class CachedSurface final { | |||
| 708 | public: | 702 | public: |
| 709 | CachedSurface(const SurfaceParams& params); | 703 | CachedSurface(const SurfaceParams& params); |
| 710 | 704 | ||
| 705 | Tegra::GPUVAddr GetAddr() const { | ||
| 706 | return params.addr; | ||
| 707 | } | ||
| 708 | |||
| 709 | size_t GetSizeInBytes() const { | ||
| 710 | return params.size_in_bytes; | ||
| 711 | } | ||
| 712 | |||
| 711 | const OGLTexture& Texture() const { | 713 | const OGLTexture& Texture() const { |
| 712 | return texture; | 714 | return texture; |
| 713 | } | 715 | } |
| @@ -737,10 +739,9 @@ private: | |||
| 737 | SurfaceParams params; | 739 | SurfaceParams params; |
| 738 | }; | 740 | }; |
| 739 | 741 | ||
| 740 | class RasterizerCacheOpenGL final : NonCopyable { | 742 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { |
| 741 | public: | 743 | public: |
| 742 | RasterizerCacheOpenGL(); | 744 | RasterizerCacheOpenGL(); |
| 743 | ~RasterizerCacheOpenGL(); | ||
| 744 | 745 | ||
| 745 | /// Get a surface based on the texture configuration | 746 | /// Get a surface based on the texture configuration |
| 746 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); | 747 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); |
| @@ -755,12 +756,6 @@ public: | |||
| 755 | /// Tries to find a framebuffer GPU address based on the provided CPU address | 756 | /// Tries to find a framebuffer GPU address based on the provided CPU address |
| 756 | Surface TryFindFramebufferSurface(VAddr cpu_addr) const; | 757 | Surface TryFindFramebufferSurface(VAddr cpu_addr) const; |
| 757 | 758 | ||
| 758 | /// Write any cached resources overlapping the region back to memory (if dirty) | ||
| 759 | void FlushRegion(Tegra::GPUVAddr addr, size_t size); | ||
| 760 | |||
| 761 | /// Mark the specified region as being invalidated | ||
| 762 | void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); | ||
| 763 | |||
| 764 | private: | 759 | private: |
| 765 | void LoadSurface(const Surface& surface); | 760 | void LoadSurface(const Surface& surface); |
| 766 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); | 761 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); |
| @@ -768,24 +763,12 @@ private: | |||
| 768 | /// Recreates a surface with new parameters | 763 | /// Recreates a surface with new parameters |
| 769 | Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); | 764 | Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); |
| 770 | 765 | ||
| 771 | /// Register surface into the cache | ||
| 772 | void RegisterSurface(const Surface& surface); | ||
| 773 | |||
| 774 | /// Remove surface from the cache | ||
| 775 | void UnregisterSurface(const Surface& surface); | ||
| 776 | |||
| 777 | /// Reserves a unique surface that can be reused later | 766 | /// Reserves a unique surface that can be reused later |
| 778 | void ReserveSurface(const Surface& surface); | 767 | void ReserveSurface(const Surface& surface); |
| 779 | 768 | ||
| 780 | /// Tries to get a reserved surface for the specified parameters | 769 | /// Tries to get a reserved surface for the specified parameters |
| 781 | Surface TryGetReservedSurface(const SurfaceParams& params); | 770 | Surface TryGetReservedSurface(const SurfaceParams& params); |
| 782 | 771 | ||
| 783 | /// Increase/decrease the number of surface in pages touching the specified region | ||
| 784 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); | ||
| 785 | |||
| 786 | std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache; | ||
| 787 | PageMap cached_pages; | ||
| 788 | |||
| 789 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 772 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
| 790 | /// previously been used. This is to prevent surfaces from being constantly created and | 773 | /// previously been used. This is to prevent surfaces from being constantly created and |
| 791 | /// destroyed when used with different surface parameters. | 774 | /// destroyed when used with different surface parameters. |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp new file mode 100644 index 000000000..3c3d1d35e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "core/core.h" | ||
| 7 | #include "core/memory.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | /// Gets the address for the specified shader stage program | ||
| 15 | static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { | ||
| 16 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 17 | |||
| 18 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | ||
| 19 | auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; | ||
| 20 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | ||
| 21 | } | ||
| 22 | |||
| 23 | /// Gets the shader program code from memory for the specified address | ||
| 24 | static GLShader::ProgramCode GetShaderCode(Tegra::GPUVAddr addr) { | ||
| 25 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 26 | |||
| 27 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | ||
| 28 | const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(addr)}; | ||
| 29 | Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | ||
| 30 | |||
| 31 | return program_code; | ||
| 32 | } | ||
| 33 | |||
| 34 | /// Helper function to set shader uniform block bindings for a single shader stage | ||
| 35 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||
| 36 | Maxwell::ShaderStage binding, size_t expected_size) { | ||
| 37 | const GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||
| 38 | if (ub_index == GL_INVALID_INDEX) { | ||
| 39 | return; | ||
| 40 | } | ||
| 41 | |||
| 42 | GLint ub_size = 0; | ||
| 43 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 44 | ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size, | ||
| 45 | "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | ||
| 46 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 47 | } | ||
| 48 | |||
| 49 | /// Sets shader uniform block bindings for an entire shader program | ||
| 50 | static void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 51 | SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex, | ||
| 52 | sizeof(GLShader::MaxwellUniformData)); | ||
| 53 | SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry, | ||
| 54 | sizeof(GLShader::MaxwellUniformData)); | ||
| 55 | SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment, | ||
| 56 | sizeof(GLShader::MaxwellUniformData)); | ||
| 57 | } | ||
| 58 | |||
| 59 | CachedShader::CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type) | ||
| 60 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | ||
| 61 | |||
| 62 | GLShader::ProgramResult program_result; | ||
| 63 | GLenum gl_type{}; | ||
| 64 | |||
| 65 | switch (program_type) { | ||
| 66 | case Maxwell::ShaderProgram::VertexA: | ||
| 67 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | ||
| 68 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | ||
| 69 | // stage here. | ||
| 70 | setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | ||
| 71 | case Maxwell::ShaderProgram::VertexB: | ||
| 72 | program_result = GLShader::GenerateVertexShader(setup); | ||
| 73 | gl_type = GL_VERTEX_SHADER; | ||
| 74 | break; | ||
| 75 | case Maxwell::ShaderProgram::Fragment: | ||
| 76 | program_result = GLShader::GenerateFragmentShader(setup); | ||
| 77 | gl_type = GL_FRAGMENT_SHADER; | ||
| 78 | break; | ||
| 79 | default: | ||
| 80 | LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | ||
| 81 | UNREACHABLE(); | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | |||
| 85 | entries = program_result.second; | ||
| 86 | |||
| 87 | OGLShader shader; | ||
| 88 | shader.Create(program_result.first.c_str(), gl_type); | ||
| 89 | program.Create(true, shader.handle); | ||
| 90 | SetShaderUniformBlockBindings(program.handle); | ||
| 91 | } | ||
| 92 | |||
| 93 | GLuint CachedShader::GetProgramResourceIndex(const std::string& name) { | ||
| 94 | auto search{resource_cache.find(name)}; | ||
| 95 | if (search == resource_cache.end()) { | ||
| 96 | const GLuint index{ | ||
| 97 | glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, name.c_str())}; | ||
| 98 | resource_cache[name] = index; | ||
| 99 | return index; | ||
| 100 | } | ||
| 101 | |||
| 102 | return search->second; | ||
| 103 | } | ||
| 104 | |||
| 105 | GLint CachedShader::GetUniformLocation(const std::string& name) { | ||
| 106 | auto search{uniform_cache.find(name)}; | ||
| 107 | if (search == uniform_cache.end()) { | ||
| 108 | const GLint index{glGetUniformLocation(program.handle, name.c_str())}; | ||
| 109 | uniform_cache[name] = index; | ||
| 110 | return index; | ||
| 111 | } | ||
| 112 | |||
| 113 | return search->second; | ||
| 114 | } | ||
| 115 | |||
| 116 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||
| 117 | const Tegra::GPUVAddr program_addr{GetShaderAddress(program)}; | ||
| 118 | |||
| 119 | // Look up shader in the cache based on address | ||
| 120 | Shader shader{TryGet(program_addr)}; | ||
| 121 | |||
| 122 | if (!shader) { | ||
| 123 | // No shader found - create a new one | ||
| 124 | shader = std::make_shared<CachedShader>(program_addr, program); | ||
| 125 | Register(shader); | ||
| 126 | } | ||
| 127 | |||
| 128 | return shader; | ||
| 129 | } | ||
| 130 | |||
| 131 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h new file mode 100644 index 000000000..44156dcab --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/memory_manager.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 15 | |||
| 16 | namespace OpenGL { | ||
| 17 | |||
| 18 | class CachedShader; | ||
| 19 | using Shader = std::shared_ptr<CachedShader>; | ||
| 20 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 21 | |||
| 22 | class CachedShader final { | ||
| 23 | public: | ||
| 24 | CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type); | ||
| 25 | |||
| 26 | /// Gets the address of the shader in guest memory, required for cache management | ||
| 27 | Tegra::GPUVAddr GetAddr() const { | ||
| 28 | return addr; | ||
| 29 | } | ||
| 30 | |||
| 31 | /// Gets the size of the shader in guest memory, required for cache management | ||
| 32 | size_t GetSizeInBytes() const { | ||
| 33 | return sizeof(GLShader::ProgramCode); | ||
| 34 | } | ||
| 35 | |||
| 36 | /// Gets the shader entries for the shader | ||
| 37 | const GLShader::ShaderEntries& GetShaderEntries() const { | ||
| 38 | return entries; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Gets the GL program handle for the shader | ||
| 42 | GLuint GetProgramHandle() const { | ||
| 43 | return program.handle; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Gets the GL program resource location for the specified resource, caching as needed | ||
| 47 | GLuint GetProgramResourceIndex(const std::string& name); | ||
| 48 | |||
| 49 | /// Gets the GL uniform location for the specified resource, caching as needed | ||
| 50 | GLint GetUniformLocation(const std::string& name); | ||
| 51 | |||
| 52 | private: | ||
| 53 | Tegra::GPUVAddr addr; | ||
| 54 | Maxwell::ShaderProgram program_type; | ||
| 55 | GLShader::ShaderSetup setup; | ||
| 56 | GLShader::ShaderEntries entries; | ||
| 57 | OGLProgram program; | ||
| 58 | |||
| 59 | std::unordered_map<std::string, GLuint> resource_cache; | ||
| 60 | std::unordered_map<std::string, GLint> uniform_cache; | ||
| 61 | }; | ||
| 62 | |||
| 63 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | ||
| 64 | public: | ||
| 65 | /// Gets the current specified shader stage program | ||
| 66 | Shader GetStageProgram(Maxwell::ShaderProgram program); | ||
| 67 | }; | ||
| 68 | |||
| 69 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 0677317bc..6ca05945e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -13,7 +13,7 @@ using Tegra::Engines::Maxwell3D; | |||
| 13 | 13 | ||
| 14 | static constexpr u32 PROGRAM_OFFSET{10}; | 14 | static constexpr u32 PROGRAM_OFFSET{10}; |
| 15 | 15 | ||
| 16 | ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { | 16 | ProgramResult GenerateVertexShader(const ShaderSetup& setup) { |
| 17 | std::string out = "#version 430 core\n"; | 17 | std::string out = "#version 430 core\n"; |
| 18 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 18 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 19 | out += Decompiler::GetCommonDeclarations(); | 19 | out += Decompiler::GetCommonDeclarations(); |
| @@ -75,7 +75,7 @@ void main() { | |||
| 75 | return {out, program.second}; | 75 | return {out, program.second}; |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { | 78 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { |
| 79 | std::string out = "#version 430 core\n"; | 79 | std::string out = "#version 430 core\n"; |
| 80 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 80 | out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 81 | out += Decompiler::GetCommonDeclarations(); | 81 | out += Decompiler::GetCommonDeclarations(); |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 4e5a6f130..c788099d4 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -6,12 +6,9 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <type_traits> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | 9 | #include <vector> |
| 12 | #include <boost/functional/hash.hpp> | 10 | |
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 14 | #include "common/hash.h" | ||
| 15 | 12 | ||
| 16 | namespace OpenGL::GLShader { | 13 | namespace OpenGL::GLShader { |
| 17 | 14 | ||
| @@ -124,18 +121,8 @@ struct ShaderSetup { | |||
| 124 | ProgramCode code_b; // Used for dual vertex shaders | 121 | ProgramCode code_b; // Used for dual vertex shaders |
| 125 | } program; | 122 | } program; |
| 126 | 123 | ||
| 127 | bool program_code_hash_dirty = true; | ||
| 128 | |||
| 129 | u64 GetProgramCodeHash() { | ||
| 130 | if (program_code_hash_dirty) { | ||
| 131 | program_code_hash = GetNewHash(); | ||
| 132 | program_code_hash_dirty = false; | ||
| 133 | } | ||
| 134 | return program_code_hash; | ||
| 135 | } | ||
| 136 | |||
| 137 | /// Used in scenarios where we have a dual vertex shaders | 124 | /// Used in scenarios where we have a dual vertex shaders |
| 138 | void SetProgramB(ProgramCode program_b) { | 125 | void SetProgramB(ProgramCode&& program_b) { |
| 139 | program.code_b = std::move(program_b); | 126 | program.code_b = std::move(program_b); |
| 140 | has_program_b = true; | 127 | has_program_b = true; |
| 141 | } | 128 | } |
| @@ -145,73 +132,19 @@ struct ShaderSetup { | |||
| 145 | } | 132 | } |
| 146 | 133 | ||
| 147 | private: | 134 | private: |
| 148 | u64 GetNewHash() const { | ||
| 149 | size_t hash = 0; | ||
| 150 | |||
| 151 | const u64 hash_a = Common::ComputeHash64(program.code.data(), program.code.size()); | ||
| 152 | boost::hash_combine(hash, hash_a); | ||
| 153 | |||
| 154 | if (has_program_b) { | ||
| 155 | // Compute hash over dual shader programs | ||
| 156 | const u64 hash_b = Common::ComputeHash64(program.code_b.data(), program.code_b.size()); | ||
| 157 | boost::hash_combine(hash, hash_b); | ||
| 158 | } | ||
| 159 | |||
| 160 | return hash; | ||
| 161 | } | ||
| 162 | |||
| 163 | u64 program_code_hash{}; | ||
| 164 | bool has_program_b{}; | 135 | bool has_program_b{}; |
| 165 | }; | 136 | }; |
| 166 | 137 | ||
| 167 | struct MaxwellShaderConfigCommon { | ||
| 168 | void Init(ShaderSetup& setup) { | ||
| 169 | program_hash = setup.GetProgramCodeHash(); | ||
| 170 | } | ||
| 171 | |||
| 172 | u64 program_hash; | ||
| 173 | }; | ||
| 174 | |||
| 175 | struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { | ||
| 176 | explicit MaxwellVSConfig(ShaderSetup& setup) { | ||
| 177 | state.Init(setup); | ||
| 178 | } | ||
| 179 | }; | ||
| 180 | |||
| 181 | struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> { | ||
| 182 | explicit MaxwellFSConfig(ShaderSetup& setup) { | ||
| 183 | state.Init(setup); | ||
| 184 | } | ||
| 185 | }; | ||
| 186 | |||
| 187 | /** | 138 | /** |
| 188 | * Generates the GLSL vertex shader program source code for the given VS program | 139 | * Generates the GLSL vertex shader program source code for the given VS program |
| 189 | * @returns String of the shader source code | 140 | * @returns String of the shader source code |
| 190 | */ | 141 | */ |
| 191 | ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); | 142 | ProgramResult GenerateVertexShader(const ShaderSetup& setup); |
| 192 | 143 | ||
| 193 | /** | 144 | /** |
| 194 | * Generates the GLSL fragment shader program source code for the given FS program | 145 | * Generates the GLSL fragment shader program source code for the given FS program |
| 195 | * @returns String of the shader source code | 146 | * @returns String of the shader source code |
| 196 | */ | 147 | */ |
| 197 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); | 148 | ProgramResult GenerateFragmentShader(const ShaderSetup& setup); |
| 198 | 149 | ||
| 199 | } // namespace OpenGL::GLShader | 150 | } // namespace OpenGL::GLShader |
| 200 | |||
| 201 | namespace std { | ||
| 202 | |||
| 203 | template <> | ||
| 204 | struct hash<OpenGL::GLShader::MaxwellVSConfig> { | ||
| 205 | size_t operator()(const OpenGL::GLShader::MaxwellVSConfig& k) const { | ||
| 206 | return k.Hash(); | ||
| 207 | } | ||
| 208 | }; | ||
| 209 | |||
| 210 | template <> | ||
| 211 | struct hash<OpenGL::GLShader::MaxwellFSConfig> { | ||
| 212 | size_t operator()(const OpenGL::GLShader::MaxwellFSConfig& k) const { | ||
| 213 | return k.Hash(); | ||
| 214 | } | ||
| 215 | }; | ||
| 216 | |||
| 217 | } // namespace std | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 8960afef5..022d32a86 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -3,39 +3,10 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | 5 | #include "core/core.h" |
| 6 | #include "core/hle/kernel/process.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | ||
| 8 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 6 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 9 | 7 | ||
| 10 | namespace OpenGL::GLShader { | 8 | namespace OpenGL::GLShader { |
| 11 | 9 | ||
| 12 | namespace Impl { | ||
| 13 | static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||
| 14 | Maxwell3D::Regs::ShaderStage binding, | ||
| 15 | size_t expected_size) { | ||
| 16 | const GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||
| 17 | if (ub_index == GL_INVALID_INDEX) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | |||
| 21 | GLint ub_size = 0; | ||
| 22 | glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||
| 23 | ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size, | ||
| 24 | "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | ||
| 25 | glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||
| 26 | } | ||
| 27 | |||
| 28 | void SetShaderUniformBlockBindings(GLuint shader) { | ||
| 29 | SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex, | ||
| 30 | sizeof(MaxwellUniformData)); | ||
| 31 | SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry, | ||
| 32 | sizeof(MaxwellUniformData)); | ||
| 33 | SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment, | ||
| 34 | sizeof(MaxwellUniformData)); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace Impl | ||
| 38 | |||
| 39 | void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { | 10 | void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { |
| 40 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 11 | const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 41 | const auto& regs = gpu.regs; | 12 | const auto& regs = gpu.regs; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 0e7085776..533e42caa 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -4,12 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <tuple> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <boost/functional/hash.hpp> | ||
| 10 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | |||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 9 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 12 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 13 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 10 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 14 | 11 | ||
| 15 | namespace OpenGL::GLShader { | 12 | namespace OpenGL::GLShader { |
| @@ -19,10 +16,6 @@ static constexpr size_t NumTextureSamplers = 32; | |||
| 19 | 16 | ||
| 20 | using Tegra::Engines::Maxwell3D; | 17 | using Tegra::Engines::Maxwell3D; |
| 21 | 18 | ||
| 22 | namespace Impl { | ||
| 23 | void SetShaderUniformBlockBindings(GLuint shader); | ||
| 24 | } // namespace Impl | ||
| 25 | |||
| 26 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | 19 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned |
| 27 | // NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at | 20 | // NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at |
| 28 | // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. | 21 | // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. |
| @@ -36,102 +29,22 @@ static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure si | |||
| 36 | static_assert(sizeof(MaxwellUniformData) < 16384, | 29 | static_assert(sizeof(MaxwellUniformData) < 16384, |
| 37 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | 30 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); |
| 38 | 31 | ||
| 39 | class OGLShaderStage { | ||
| 40 | public: | ||
| 41 | OGLShaderStage() = default; | ||
| 42 | |||
| 43 | void Create(const ProgramResult& program_result, GLenum type) { | ||
| 44 | OGLShader shader; | ||
| 45 | shader.Create(program_result.first.c_str(), type); | ||
| 46 | program.Create(true, shader.handle); | ||
| 47 | Impl::SetShaderUniformBlockBindings(program.handle); | ||
| 48 | entries = program_result.second; | ||
| 49 | } | ||
| 50 | GLuint GetHandle() const { | ||
| 51 | return program.handle; | ||
| 52 | } | ||
| 53 | |||
| 54 | ShaderEntries GetEntries() const { | ||
| 55 | return entries; | ||
| 56 | } | ||
| 57 | |||
| 58 | private: | ||
| 59 | OGLProgram program; | ||
| 60 | ShaderEntries entries; | ||
| 61 | }; | ||
| 62 | |||
| 63 | // TODO(wwylele): beautify this doc | ||
| 64 | // This is a shader cache designed for translating PICA shader to GLSL shader. | ||
| 65 | // The double cache is needed because diffent KeyConfigType, which includes a hash of the code | ||
| 66 | // region (including its leftover unused code) can generate the same GLSL code. | ||
| 67 | template <typename KeyConfigType, | ||
| 68 | ProgramResult (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&), | ||
| 69 | GLenum ShaderType> | ||
| 70 | class ShaderCache { | ||
| 71 | public: | ||
| 72 | ShaderCache() = default; | ||
| 73 | |||
| 74 | using Result = std::pair<GLuint, ShaderEntries>; | ||
| 75 | |||
| 76 | Result Get(const KeyConfigType& key, const ShaderSetup& setup) { | ||
| 77 | auto map_it = shader_map.find(key); | ||
| 78 | if (map_it == shader_map.end()) { | ||
| 79 | ProgramResult program = CodeGenerator(setup, key); | ||
| 80 | |||
| 81 | auto [iter, new_shader] = shader_cache.emplace(program.first, OGLShaderStage{}); | ||
| 82 | OGLShaderStage& cached_shader = iter->second; | ||
| 83 | if (new_shader) { | ||
| 84 | cached_shader.Create(program, ShaderType); | ||
| 85 | } | ||
| 86 | shader_map[key] = &cached_shader; | ||
| 87 | return {cached_shader.GetHandle(), program.second}; | ||
| 88 | } else { | ||
| 89 | return {map_it->second->GetHandle(), map_it->second->GetEntries()}; | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map; | ||
| 95 | std::unordered_map<std::string, OGLShaderStage> shader_cache; | ||
| 96 | }; | ||
| 97 | |||
| 98 | using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>; | ||
| 99 | |||
| 100 | using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>; | ||
| 101 | |||
| 102 | class ProgramManager { | 32 | class ProgramManager { |
| 103 | public: | 33 | public: |
| 104 | ProgramManager() { | 34 | ProgramManager() { |
| 105 | pipeline.Create(); | 35 | pipeline.Create(); |
| 106 | } | 36 | } |
| 107 | 37 | ||
| 108 | ShaderEntries UseProgrammableVertexShader(const MaxwellVSConfig& config, | 38 | void UseProgrammableVertexShader(GLuint program) { |
| 109 | const ShaderSetup& setup) { | 39 | vs = program; |
| 110 | ShaderEntries result; | ||
| 111 | std::tie(current.vs, result) = vertex_shaders.Get(config, setup); | ||
| 112 | return result; | ||
| 113 | } | ||
| 114 | |||
| 115 | ShaderEntries UseProgrammableFragmentShader(const MaxwellFSConfig& config, | ||
| 116 | const ShaderSetup& setup) { | ||
| 117 | ShaderEntries result; | ||
| 118 | std::tie(current.fs, result) = fragment_shaders.Get(config, setup); | ||
| 119 | return result; | ||
| 120 | } | 40 | } |
| 121 | 41 | ||
| 122 | GLuint GetCurrentProgramStage(Maxwell3D::Regs::ShaderStage stage) const { | 42 | void UseProgrammableFragmentShader(GLuint program) { |
| 123 | switch (stage) { | 43 | fs = program; |
| 124 | case Maxwell3D::Regs::ShaderStage::Vertex: | ||
| 125 | return current.vs; | ||
| 126 | case Maxwell3D::Regs::ShaderStage::Fragment: | ||
| 127 | return current.fs; | ||
| 128 | } | ||
| 129 | |||
| 130 | UNREACHABLE(); | ||
| 131 | } | 44 | } |
| 132 | 45 | ||
| 133 | void UseTrivialGeometryShader() { | 46 | void UseTrivialGeometryShader() { |
| 134 | current.gs = 0; | 47 | gs = 0; |
| 135 | } | 48 | } |
| 136 | 49 | ||
| 137 | void ApplyTo(OpenGLState& state) { | 50 | void ApplyTo(OpenGLState& state) { |
| @@ -140,35 +53,16 @@ public: | |||
| 140 | GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, | 53 | GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, |
| 141 | 0); | 54 | 0); |
| 142 | 55 | ||
| 143 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); | 56 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs); |
| 144 | glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); | 57 | glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs); |
| 145 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); | 58 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs); |
| 146 | state.draw.shader_program = 0; | 59 | state.draw.shader_program = 0; |
| 147 | state.draw.program_pipeline = pipeline.handle; | 60 | state.draw.program_pipeline = pipeline.handle; |
| 148 | } | 61 | } |
| 149 | 62 | ||
| 150 | private: | 63 | private: |
| 151 | struct ShaderTuple { | ||
| 152 | GLuint vs = 0, gs = 0, fs = 0; | ||
| 153 | bool operator==(const ShaderTuple& rhs) const { | ||
| 154 | return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs); | ||
| 155 | } | ||
| 156 | struct Hash { | ||
| 157 | std::size_t operator()(const ShaderTuple& tuple) const { | ||
| 158 | std::size_t hash = 0; | ||
| 159 | boost::hash_combine(hash, tuple.vs); | ||
| 160 | boost::hash_combine(hash, tuple.gs); | ||
| 161 | boost::hash_combine(hash, tuple.fs); | ||
| 162 | return hash; | ||
| 163 | } | ||
| 164 | }; | ||
| 165 | }; | ||
| 166 | ShaderTuple current; | ||
| 167 | VertexShaders vertex_shaders; | ||
| 168 | FragmentShaders fragment_shaders; | ||
| 169 | |||
| 170 | std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache; | ||
| 171 | OGLPipeline pipeline; | 64 | OGLPipeline pipeline; |
| 65 | GLuint vs{}, fs{}, gs{}; | ||
| 172 | }; | 66 | }; |
| 173 | 67 | ||
| 174 | } // namespace OpenGL::GLShader | 68 | } // namespace OpenGL::GLShader |