summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2018-08-27 20:35:58 -0400
committerGravatar GitHub2018-08-27 20:35:58 -0400
commitffe2336136dc683b8d97a355c2446aad2aaa5905 (patch)
tree62cc8e3ff2dcf995f9f3ae7c8928a27dada3733d /src
parentMerge pull request #1189 from FearlessTobi/fix-stick-directions (diff)
parentrenderer_opengl: Implement a new shader cache. (diff)
downloadyuzu-ffe2336136dc683b8d97a355c2446aad2aaa5905.tar.gz
yuzu-ffe2336136dc683b8d97a355c2446aad2aaa5905.tar.xz
yuzu-ffe2336136dc683b8d97a355c2446aad2aaa5905.zip
Merge pull request #1165 from bunnei/shader-cache
renderer_opengl: Implement a new shader cache.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/rasterizer_cache.h116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h37
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp131
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h69
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h75
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp29
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h126
12 files changed, 387 insertions, 417 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c6431e722..aa5bc3bbe 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -18,6 +18,7 @@ add_library(video_core STATIC
18 macro_interpreter.h 18 macro_interpreter.h
19 memory_manager.cpp 19 memory_manager.cpp
20 memory_manager.h 20 memory_manager.h
21 rasterizer_cache.h
21 rasterizer_interface.h 22 rasterizer_interface.h
22 renderer_base.cpp 23 renderer_base.cpp
23 renderer_base.h 24 renderer_base.h
@@ -26,6 +27,8 @@ add_library(video_core STATIC
26 renderer_opengl/gl_rasterizer_cache.cpp 27 renderer_opengl/gl_rasterizer_cache.cpp
27 renderer_opengl/gl_rasterizer_cache.h 28 renderer_opengl/gl_rasterizer_cache.h
28 renderer_opengl/gl_resource_manager.h 29 renderer_opengl/gl_resource_manager.h
30 renderer_opengl/gl_shader_cache.cpp
31 renderer_opengl/gl_shader_cache.h
29 renderer_opengl/gl_shader_decompiler.cpp 32 renderer_opengl/gl_shader_decompiler.cpp
30 renderer_opengl/gl_shader_decompiler.h 33 renderer_opengl/gl_shader_decompiler.h
31 renderer_opengl/gl_shader_gen.cpp 34 renderer_opengl/gl_shader_gen.cpp
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
new file mode 100644
index 000000000..7a0492a4e
--- /dev/null
+++ b/src/video_core/rasterizer_cache.h
@@ -0,0 +1,116 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8#include <boost/icl/interval_map.hpp>
9#include <boost/range/iterator_range.hpp>
10
11#include "common/common_types.h"
12#include "core/memory.h"
13#include "video_core/memory_manager.h"
14
15template <class T>
16class RasterizerCache : NonCopyable {
17public:
18 /// Mark the specified region as being invalidated
19 void InvalidateRegion(Tegra::GPUVAddr region_addr, size_t region_size) {
20 for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) {
21 const auto& object{iter->second};
22
23 ++iter;
24
25 if (object->GetAddr() <= (region_addr + region_size) &&
26 region_addr <= (object->GetAddr() + object->GetSizeInBytes())) {
27 // Regions overlap, so invalidate
28 Unregister(object);
29 }
30 }
31 }
32
33protected:
34 /// Tries to get an object from the cache with the specified address
35 T TryGet(Tegra::GPUVAddr addr) const {
36 const auto& search{cached_objects.find(addr)};
37 if (search != cached_objects.end()) {
38 return search->second;
39 }
40
41 return nullptr;
42 }
43
44 /// Gets a reference to the cache
45 const std::unordered_map<Tegra::GPUVAddr, T>& GetCache() const {
46 return cached_objects;
47 }
48
49 /// Register an object into the cache
50 void Register(const T& object) {
51 const auto& search{cached_objects.find(object->GetAddr())};
52 if (search != cached_objects.end()) {
53 // Registered already
54 return;
55 }
56
57 cached_objects[object->GetAddr()] = object;
58 UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1);
59 }
60
61 /// Unregisters an object from the cache
62 void Unregister(const T& object) {
63 const auto& search{cached_objects.find(object->GetAddr())};
64 if (search == cached_objects.end()) {
65 // Unregistered already
66 return;
67 }
68
69 UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
70 cached_objects.erase(search);
71 }
72
73private:
74 using PageMap = boost::icl::interval_map<u64, int>;
75
76 template <typename Map, typename Interval>
77 constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
78 return boost::make_iterator_range(map.equal_range(interval));
79 }
80
81 /// Increase/decrease the number of object in pages touching the specified region
82 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
83 const u64 page_start{addr >> Tegra::MemoryManager::PAGE_BITS};
84 const u64 page_end{(addr + size) >> Tegra::MemoryManager::PAGE_BITS};
85
86 // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
87 // subtract after iterating
88 const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
89 if (delta > 0)
90 cached_pages.add({pages_interval, delta});
91
92 for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
93 const auto interval = pair.first & pages_interval;
94 const int count = pair.second;
95
96 const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
97 << Tegra::MemoryManager::PAGE_BITS;
98 const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
99 << Tegra::MemoryManager::PAGE_BITS;
100 const u64 interval_size = interval_end_addr - interval_start_addr;
101
102 if (delta > 0 && count == delta)
103 Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
104 else if (delta < 0 && count == -delta)
105 Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
106 else
107 ASSERT(count >= 0);
108 }
109
110 if (delta < 0)
111 cached_pages.add({pages_interval, delta});
112 }
113
114 std::unordered_map<Tegra::GPUVAddr, T> cached_objects;
115 PageMap cached_pages;
116};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 96851ccb5..9951d8178 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -178,19 +178,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
178 return {array_ptr, buffer_offset}; 178 return {array_ptr, buffer_offset};
179} 179}
180 180
181static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) {
182 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
183
184 // Fetch program code from memory
185 GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
186 auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
187 const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
188 const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
189 Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
190
191 return program_code;
192}
193
194std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { 181std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
195 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 182 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
196 183
@@ -224,31 +211,17 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
224 buffer_ptr += sizeof(ubo); 211 buffer_ptr += sizeof(ubo);
225 buffer_offset += sizeof(ubo); 212 buffer_offset += sizeof(ubo);
226 213
227 GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; 214 const Tegra::GPUVAddr addr{gpu.regs.code_address.CodeAddress() + shader_config.offset};
228 GLShader::ShaderEntries shader_resources; 215 Shader shader{shader_cache.GetStageProgram(program)};
229 216
230 switch (program) { 217 switch (program) {
231 case Maxwell::ShaderProgram::VertexA: { 218 case Maxwell::ShaderProgram::VertexA:
232 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
233 // Conventional HW does not support this, so we combine VertexA and VertexB into one
234 // stage here.
235 setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB));
236 GLShader::MaxwellVSConfig vs_config{setup};
237 shader_resources =
238 shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
239 break;
240 }
241
242 case Maxwell::ShaderProgram::VertexB: { 219 case Maxwell::ShaderProgram::VertexB: {
243 GLShader::MaxwellVSConfig vs_config{setup}; 220 shader_program_manager->UseProgrammableVertexShader(shader->GetProgramHandle());
244 shader_resources =
245 shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
246 break; 221 break;
247 } 222 }
248 case Maxwell::ShaderProgram::Fragment: { 223 case Maxwell::ShaderProgram::Fragment: {
249 GLShader::MaxwellFSConfig fs_config{setup}; 224 shader_program_manager->UseProgrammableFragmentShader(shader->GetProgramHandle());
250 shader_resources =
251 shader_program_manager->UseProgrammableFragmentShader(fs_config, setup);
252 break; 225 break;
253 } 226 }
254 default: 227 default:
@@ -257,18 +230,14 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
257 UNREACHABLE(); 230 UNREACHABLE();
258 } 231 }
259 232
260 GLuint gl_stage_program = shader_program_manager->GetCurrentProgramStage(
261 static_cast<Maxwell::ShaderStage>(stage));
262
263 // Configure the const buffers for this shader stage. 233 // Configure the const buffers for this shader stage.
264 std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( 234 std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) =
265 buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, 235 SetupConstBuffers(buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage),
266 current_constbuffer_bindpoint, shader_resources.const_buffer_entries); 236 shader, current_constbuffer_bindpoint);
267 237
268 // Configure the textures for this shader stage. 238 // Configure the textures for this shader stage.
269 current_texture_bindpoint = 239 current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
270 SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, 240 current_texture_bindpoint);
271 current_texture_bindpoint, shader_resources.texture_samplers);
272 241
273 // When VertexA is enabled, we have dual vertex shaders 242 // When VertexA is enabled, we have dual vertex shaders
274 if (program == Maxwell::ShaderProgram::VertexA) { 243 if (program == Maxwell::ShaderProgram::VertexA) {
@@ -571,23 +540,21 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
571 540
572void RasterizerOpenGL::FlushAll() { 541void RasterizerOpenGL::FlushAll() {
573 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 542 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
574 res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
575} 543}
576 544
577void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { 545void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
578 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 546 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
579 res_cache.FlushRegion(addr, size);
580} 547}
581 548
582void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { 549void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
583 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 550 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
584 res_cache.InvalidateRegion(addr, size); 551 res_cache.InvalidateRegion(addr, size);
552 shader_cache.InvalidateRegion(addr, size);
585} 553}
586 554
587void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { 555void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
588 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 556 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
589 res_cache.FlushRegion(addr, size); 557 InvalidateRegion(addr, size);
590 res_cache.InvalidateRegion(addr, size);
591} 558}
592 559
593bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { 560bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
@@ -672,15 +639,17 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
672 } 639 }
673} 640}
674 641
675std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( 642std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr,
676 u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, 643 GLintptr buffer_offset,
677 u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { 644 Maxwell::ShaderStage stage,
645 Shader& shader,
646 u32 current_bindpoint) {
678 const auto& gpu = Core::System::GetInstance().GPU(); 647 const auto& gpu = Core::System::GetInstance().GPU();
679 const auto& maxwell3d = gpu.Maxwell3D(); 648 const auto& maxwell3d = gpu.Maxwell3D();
680
681 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
682 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; 649 const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
650 const auto& entries = shader->GetShaderEntries().const_buffer_entries;
683 651
652 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
684 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 653 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
685 const auto& used_buffer = entries[bindpoint]; 654 const auto& used_buffer = entries[bindpoint];
686 const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; 655 const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
@@ -719,12 +688,9 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
719 stream_buffer.GetHandle(), const_buffer_offset, size); 688 stream_buffer.GetHandle(), const_buffer_offset, size);
720 689
721 // Now configure the bindpoint of the buffer inside the shader 690 // Now configure the bindpoint of the buffer inside the shader
722 const std::string buffer_name = used_buffer.GetName(); 691 glUniformBlockBinding(shader->GetProgramHandle(),
723 const GLuint index = 692 shader->GetProgramResourceIndex(used_buffer.GetName()),
724 glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); 693 current_bindpoint + bindpoint);
725 if (index != GL_INVALID_INDEX) {
726 glUniformBlockBinding(program, index, current_bindpoint + bindpoint);
727 }
728 } 694 }
729 695
730 state.Apply(); 696 state.Apply();
@@ -732,10 +698,10 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
732 return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; 698 return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
733} 699}
734 700
735u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, 701u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {
736 const std::vector<GLShader::SamplerEntry>& entries) {
737 const auto& gpu = Core::System::GetInstance().GPU(); 702 const auto& gpu = Core::System::GetInstance().GPU();
738 const auto& maxwell3d = gpu.Maxwell3D(); 703 const auto& maxwell3d = gpu.Maxwell3D();
704 const auto& entries = shader->GetShaderEntries().texture_samplers;
739 705
740 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), 706 ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
741 "Exceeded the number of active textures."); 707 "Exceeded the number of active textures.");
@@ -745,12 +711,9 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
745 u32 current_bindpoint = current_unit + bindpoint; 711 u32 current_bindpoint = current_unit + bindpoint;
746 712
747 // Bind the uniform to the sampler. 713 // Bind the uniform to the sampler.
748 GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
749 if (uniform == -1) {
750 continue;
751 }
752 714
753 glProgramUniform1i(program, uniform, current_bindpoint); 715 glProgramUniform1i(shader->GetProgramHandle(), shader->GetUniformLocation(entry.GetName()),
716 current_bindpoint);
754 717
755 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); 718 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
756 719
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 531b04046..7dd329efe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -17,6 +17,7 @@
17#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 18#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_shader_cache.h"
20#include "video_core/renderer_opengl/gl_shader_gen.h" 21#include "video_core/renderer_opengl/gl_shader_gen.h"
21#include "video_core/renderer_opengl/gl_shader_manager.h" 22#include "video_core/renderer_opengl/gl_shader_manager.h"
22#include "video_core/renderer_opengl/gl_state.h" 23#include "video_core/renderer_opengl/gl_state.h"
@@ -99,26 +100,23 @@ private:
99 /* 100 /*
100 * Configures the current constbuffers to use for the draw command. 101 * Configures the current constbuffers to use for the draw command.
101 * @param stage The shader stage to configure buffers for. 102 * @param stage The shader stage to configure buffers for.
102 * @param program The OpenGL program object that contains the specified stage. 103 * @param shader The shader object that contains the specified stage.
103 * @param current_bindpoint The offset at which to start counting new buffer bindpoints. 104 * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
104 * @param entries Vector describing the buffers that are actually used in the guest shader.
105 * @returns The next available bindpoint for use in the next shader stage. 105 * @returns The next available bindpoint for use in the next shader stage.
106 */ 106 */
107 std::tuple<u8*, GLintptr, u32> SetupConstBuffers( 107 std::tuple<u8*, GLintptr, u32> SetupConstBuffers(
108 u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 108 u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
109 GLuint program, u32 current_bindpoint, 109 Shader& shader, u32 current_bindpoint);
110 const std::vector<GLShader::ConstBufferEntry>& entries);
111 110
112 /* 111 /*
113 * Configures the current textures to use for the draw command. 112 * Configures the current textures to use for the draw command.
114 * @param stage The shader stage to configure textures for. 113 * @param stage The shader stage to configure textures for.
115 * @param program The OpenGL program object that contains the specified stage. 114 * @param shader The shader object that contains the specified stage.
116 * @param current_unit The offset at which to start counting unused texture units. 115 * @param current_unit The offset at which to start counting unused texture units.
117 * @param entries Vector describing the textures that are actually used in the guest shader.
118 * @returns The next available bindpoint for use in the next shader stage. 116 * @returns The next available bindpoint for use in the next shader stage.
119 */ 117 */
120 u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, 118 u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
121 u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); 119 u32 current_unit);
122 120
123 /// Syncs the viewport to match the guest state 121 /// Syncs the viewport to match the guest state
124 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); 122 void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
@@ -157,6 +155,7 @@ private:
157 OpenGLState state; 155 OpenGLState state;
158 156
159 RasterizerCacheOpenGL res_cache; 157 RasterizerCacheOpenGL res_cache;
158 ShaderCacheOpenGL shader_cache;
160 159
161 Core::Frontend::EmuWindow& emu_window; 160 Core::Frontend::EmuWindow& emu_window;
162 161
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 83d8d3d94..65305000c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -677,12 +677,6 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
677 draw_framebuffer.Create(); 677 draw_framebuffer.Create();
678} 678}
679 679
680RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
681 while (!surface_cache.empty()) {
682 UnregisterSurface(surface_cache.begin()->second);
683 }
684}
685
686Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { 680Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
687 return GetSurface(SurfaceParams::CreateForTexture(config)); 681 return GetSurface(SurfaceParams::CreateForTexture(config));
688} 682}
@@ -766,27 +760,25 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
766 return {}; 760 return {};
767 761
768 // Look up surface in the cache based on address 762 // Look up surface in the cache based on address
769 const auto& search{surface_cache.find(params.addr)}; 763 Surface surface{TryGet(params.addr)};
770 Surface surface; 764 if (surface) {
771 if (search != surface_cache.end()) {
772 surface = search->second;
773 if (Settings::values.use_accurate_framebuffers) { 765 if (Settings::values.use_accurate_framebuffers) {
774 // If use_accurate_framebuffers is enabled, always load from memory 766 // If use_accurate_framebuffers is enabled, always load from memory
775 FlushSurface(surface); 767 FlushSurface(surface);
776 UnregisterSurface(surface); 768 Unregister(surface);
777 } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 769 } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
778 // Use the cached surface as-is 770 // Use the cached surface as-is
779 return surface; 771 return surface;
780 } else if (preserve_contents) { 772 } else if (preserve_contents) {
781 // If surface parameters changed and we care about keeping the previous data, recreate 773 // If surface parameters changed and we care about keeping the previous data, recreate
782 // the surface from the old one 774 // the surface from the old one
783 UnregisterSurface(surface); 775 Unregister(surface);
784 Surface new_surface{RecreateSurface(surface, params)}; 776 Surface new_surface{RecreateSurface(surface, params)};
785 RegisterSurface(new_surface); 777 Register(new_surface);
786 return new_surface; 778 return new_surface;
787 } else { 779 } else {
788 // Delete the old surface before creating a new one to prevent collisions. 780 // Delete the old surface before creating a new one to prevent collisions.
789 UnregisterSurface(surface); 781 Unregister(surface);
790 } 782 }
791 } 783 }
792 784
@@ -797,7 +789,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
797 if (!surface) { 789 if (!surface) {
798 surface = std::make_shared<CachedSurface>(params); 790 surface = std::make_shared<CachedSurface>(params);
799 ReserveSurface(surface); 791 ReserveSurface(surface);
800 RegisterSurface(surface); 792 Register(surface);
801 } 793 }
802 794
803 // Only load surface from memory if we care about the contents 795 // Only load surface from memory if we care about the contents
@@ -894,7 +886,7 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
894 // framebuffer overlaps surfaces. 886 // framebuffer overlaps surfaces.
895 887
896 std::vector<Surface> surfaces; 888 std::vector<Surface> surfaces;
897 for (const auto& surface : surface_cache) { 889 for (const auto& surface : GetCache()) {
898 const auto& params = surface.second->GetSurfaceParams(); 890 const auto& params = surface.second->GetSurfaceParams();
899 const VAddr surface_cpu_addr = params.GetCpuAddr(); 891 const VAddr surface_cpu_addr = params.GetCpuAddr();
900 if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { 892 if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) {
@@ -912,51 +904,6 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
912 return surfaces[0]; 904 return surfaces[0];
913} 905}
914 906
915void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) {
916 // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should
917 // probably implement this in the future, but for now, the `use_accurate_framebufers` setting
918 // can be used to always flush.
919}
920
921void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
922 for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) {
923 const auto& surface{iter->second};
924 const auto& params{surface->GetSurfaceParams()};
925
926 ++iter;
927
928 if (params.IsOverlappingRegion(addr, size)) {
929 UnregisterSurface(surface);
930 }
931 }
932}
933
934void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
935 const auto& params{surface->GetSurfaceParams()};
936 const auto& search{surface_cache.find(params.addr)};
937
938 if (search != surface_cache.end()) {
939 // Registered already
940 return;
941 }
942
943 surface_cache[params.addr] = surface;
944 UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
945}
946
947void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
948 const auto& params{surface->GetSurfaceParams()};
949 const auto& search{surface_cache.find(params.addr)};
950
951 if (search == surface_cache.end()) {
952 // Unregistered already
953 return;
954 }
955
956 UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
957 surface_cache.erase(search);
958}
959
960void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { 907void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
961 const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; 908 const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())};
962 surface_reserve[surface_reserve_key] = surface; 909 surface_reserve[surface_reserve_key] = surface;
@@ -966,49 +913,10 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params
966 const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; 913 const auto& surface_reserve_key{SurfaceReserveKey::Create(params)};
967 auto search{surface_reserve.find(surface_reserve_key)}; 914 auto search{surface_reserve.find(surface_reserve_key)};
968 if (search != surface_reserve.end()) { 915 if (search != surface_reserve.end()) {
969 RegisterSurface(search->second); 916 Register(search->second);
970 return search->second; 917 return search->second;
971 } 918 }
972 return {}; 919 return {};
973} 920}
974 921
975template <typename Map, typename Interval>
976constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
977 return boost::make_iterator_range(map.equal_range(interval));
978}
979
980void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
981 const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
982 (addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
983 const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
984 const u64 page_end = page_start + num_pages;
985
986 // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
987 // subtract after iterating
988 const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
989 if (delta > 0)
990 cached_pages.add({pages_interval, delta});
991
992 for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
993 const auto interval = pair.first & pages_interval;
994 const int count = pair.second;
995
996 const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
997 << Tegra::MemoryManager::PAGE_BITS;
998 const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
999 << Tegra::MemoryManager::PAGE_BITS;
1000 const u64 interval_size = interval_end_addr - interval_start_addr;
1001
1002 if (delta > 0 && count == delta)
1003 Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
1004 else if (delta < 0 && count == -delta)
1005 Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
1006 else
1007 ASSERT(count >= 0);
1008 }
1009
1010 if (delta < 0)
1011 cached_pages.add({pages_interval, delta});
1012}
1013
1014} // namespace OpenGL 922} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index c8c615df2..8a6ca2a4b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,12 +8,12 @@
8#include <map> 8#include <map>
9#include <memory> 9#include <memory>
10#include <vector> 10#include <vector>
11#include <boost/icl/interval_map.hpp>
12 11
13#include "common/common_types.h" 12#include "common/common_types.h"
14#include "common/hash.h" 13#include "common/hash.h"
15#include "common/math_util.h" 14#include "common/math_util.h"
16#include "video_core/engines/maxwell_3d.h" 15#include "video_core/engines/maxwell_3d.h"
16#include "video_core/rasterizer_cache.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 17#include "video_core/renderer_opengl/gl_resource_manager.h"
18#include "video_core/textures/texture.h" 18#include "video_core/textures/texture.h"
19 19
@@ -22,7 +22,6 @@ namespace OpenGL {
22class CachedSurface; 22class CachedSurface;
23using Surface = std::shared_ptr<CachedSurface>; 23using Surface = std::shared_ptr<CachedSurface>;
24using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 24using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
25using PageMap = boost::icl::interval_map<u64, int>;
26 25
27struct SurfaceParams { 26struct SurfaceParams {
28 enum class PixelFormat { 27 enum class PixelFormat {
@@ -632,11 +631,6 @@ struct SurfaceParams {
632 /// Returns the CPU virtual address for this surface 631 /// Returns the CPU virtual address for this surface
633 VAddr GetCpuAddr() const; 632 VAddr GetCpuAddr() const;
634 633
635 /// Returns true if the specified region overlaps with this surface's region in Switch memory
636 bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
637 return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
638 }
639
640 /// Creates SurfaceParams from a texture configuration 634 /// Creates SurfaceParams from a texture configuration
641 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); 635 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
642 636
@@ -708,6 +702,14 @@ class CachedSurface final {
708public: 702public:
709 CachedSurface(const SurfaceParams& params); 703 CachedSurface(const SurfaceParams& params);
710 704
705 Tegra::GPUVAddr GetAddr() const {
706 return params.addr;
707 }
708
709 size_t GetSizeInBytes() const {
710 return params.size_in_bytes;
711 }
712
711 const OGLTexture& Texture() const { 713 const OGLTexture& Texture() const {
712 return texture; 714 return texture;
713 } 715 }
@@ -737,10 +739,9 @@ private:
737 SurfaceParams params; 739 SurfaceParams params;
738}; 740};
739 741
740class RasterizerCacheOpenGL final : NonCopyable { 742class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
741public: 743public:
742 RasterizerCacheOpenGL(); 744 RasterizerCacheOpenGL();
743 ~RasterizerCacheOpenGL();
744 745
745 /// Get a surface based on the texture configuration 746 /// Get a surface based on the texture configuration
746 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); 747 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
@@ -755,12 +756,6 @@ public:
755 /// Tries to find a framebuffer GPU address based on the provided CPU address 756 /// Tries to find a framebuffer GPU address based on the provided CPU address
756 Surface TryFindFramebufferSurface(VAddr cpu_addr) const; 757 Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
757 758
758 /// Write any cached resources overlapping the region back to memory (if dirty)
759 void FlushRegion(Tegra::GPUVAddr addr, size_t size);
760
761 /// Mark the specified region as being invalidated
762 void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
763
764private: 759private:
765 void LoadSurface(const Surface& surface); 760 void LoadSurface(const Surface& surface);
766 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); 761 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
@@ -768,24 +763,12 @@ private:
768 /// Recreates a surface with new parameters 763 /// Recreates a surface with new parameters
769 Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); 764 Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params);
770 765
771 /// Register surface into the cache
772 void RegisterSurface(const Surface& surface);
773
774 /// Remove surface from the cache
775 void UnregisterSurface(const Surface& surface);
776
777 /// Reserves a unique surface that can be reused later 766 /// Reserves a unique surface that can be reused later
778 void ReserveSurface(const Surface& surface); 767 void ReserveSurface(const Surface& surface);
779 768
780 /// Tries to get a reserved surface for the specified parameters 769 /// Tries to get a reserved surface for the specified parameters
781 Surface TryGetReservedSurface(const SurfaceParams& params); 770 Surface TryGetReservedSurface(const SurfaceParams& params);
782 771
783 /// Increase/decrease the number of surface in pages touching the specified region
784 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
785
786 std::unordered_map<Tegra::GPUVAddr, Surface> surface_cache;
787 PageMap cached_pages;
788
789 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 772 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
790 /// previously been used. This is to prevent surfaces from being constantly created and 773 /// previously been used. This is to prevent surfaces from being constantly created and
791 /// destroyed when used with different surface parameters. 774 /// destroyed when used with different surface parameters.
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
new file mode 100644
index 000000000..3c3d1d35e
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -0,0 +1,131 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "core/core.h"
7#include "core/memory.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_opengl/gl_shader_cache.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11
12namespace OpenGL {
13
14/// Gets the address for the specified shader stage program
15static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
16 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
17
18 GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
19 auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
20 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
21}
22
23/// Gets the shader program code from memory for the specified address
24static GLShader::ProgramCode GetShaderCode(Tegra::GPUVAddr addr) {
25 auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
26
27 GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH);
28 const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(addr)};
29 Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
30
31 return program_code;
32}
33
34/// Helper function to set shader uniform block bindings for a single shader stage
35static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
36 Maxwell::ShaderStage binding, size_t expected_size) {
37 const GLuint ub_index = glGetUniformBlockIndex(shader, name);
38 if (ub_index == GL_INVALID_INDEX) {
39 return;
40 }
41
42 GLint ub_size = 0;
43 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
44 ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size,
45 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
46 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
47}
48
49/// Sets shader uniform block bindings for an entire shader program
50static void SetShaderUniformBlockBindings(GLuint shader) {
51 SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex,
52 sizeof(GLShader::MaxwellUniformData));
53 SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry,
54 sizeof(GLShader::MaxwellUniformData));
55 SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment,
56 sizeof(GLShader::MaxwellUniformData));
57}
58
59CachedShader::CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type)
60 : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} {
61
62 GLShader::ProgramResult program_result;
63 GLenum gl_type{};
64
65 switch (program_type) {
66 case Maxwell::ShaderProgram::VertexA:
67 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
68 // Conventional HW does not support this, so we combine VertexA and VertexB into one
69 // stage here.
70 setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
71 case Maxwell::ShaderProgram::VertexB:
72 program_result = GLShader::GenerateVertexShader(setup);
73 gl_type = GL_VERTEX_SHADER;
74 break;
75 case Maxwell::ShaderProgram::Fragment:
76 program_result = GLShader::GenerateFragmentShader(setup);
77 gl_type = GL_FRAGMENT_SHADER;
78 break;
79 default:
80 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
81 UNREACHABLE();
82 return;
83 }
84
85 entries = program_result.second;
86
87 OGLShader shader;
88 shader.Create(program_result.first.c_str(), gl_type);
89 program.Create(true, shader.handle);
90 SetShaderUniformBlockBindings(program.handle);
91}
92
93GLuint CachedShader::GetProgramResourceIndex(const std::string& name) {
94 auto search{resource_cache.find(name)};
95 if (search == resource_cache.end()) {
96 const GLuint index{
97 glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, name.c_str())};
98 resource_cache[name] = index;
99 return index;
100 }
101
102 return search->second;
103}
104
105GLint CachedShader::GetUniformLocation(const std::string& name) {
106 auto search{uniform_cache.find(name)};
107 if (search == uniform_cache.end()) {
108 const GLint index{glGetUniformLocation(program.handle, name.c_str())};
109 uniform_cache[name] = index;
110 return index;
111 }
112
113 return search->second;
114}
115
116Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
117 const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
118
119 // Look up shader in the cache based on address
120 Shader shader{TryGet(program_addr)};
121
122 if (!shader) {
123 // No shader found - create a new one
124 shader = std::make_shared<CachedShader>(program_addr, program);
125 Register(shader);
126 }
127
128 return shader;
129}
130
131} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
new file mode 100644
index 000000000..44156dcab
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -0,0 +1,69 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include "common/common_types.h"
11#include "video_core/memory_manager.h"
12#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_shader_gen.h"
15
16namespace OpenGL {
17
18class CachedShader;
19using Shader = std::shared_ptr<CachedShader>;
20using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21
22class CachedShader final {
23public:
24 CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type);
25
26 /// Gets the address of the shader in guest memory, required for cache management
27 Tegra::GPUVAddr GetAddr() const {
28 return addr;
29 }
30
31 /// Gets the size of the shader in guest memory, required for cache management
32 size_t GetSizeInBytes() const {
33 return sizeof(GLShader::ProgramCode);
34 }
35
36 /// Gets the shader entries for the shader
37 const GLShader::ShaderEntries& GetShaderEntries() const {
38 return entries;
39 }
40
41 /// Gets the GL program handle for the shader
42 GLuint GetProgramHandle() const {
43 return program.handle;
44 }
45
46 /// Gets the GL program resource location for the specified resource, caching as needed
47 GLuint GetProgramResourceIndex(const std::string& name);
48
49 /// Gets the GL uniform location for the specified resource, caching as needed
50 GLint GetUniformLocation(const std::string& name);
51
52private:
53 Tegra::GPUVAddr addr;
54 Maxwell::ShaderProgram program_type;
55 GLShader::ShaderSetup setup;
56 GLShader::ShaderEntries entries;
57 OGLProgram program;
58
59 std::unordered_map<std::string, GLuint> resource_cache;
60 std::unordered_map<std::string, GLint> uniform_cache;
61};
62
63class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
64public:
65 /// Gets the current specified shader stage program
66 Shader GetStageProgram(Maxwell::ShaderProgram program);
67};
68
69} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 0677317bc..6ca05945e 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -13,7 +13,7 @@ using Tegra::Engines::Maxwell3D;
13 13
14static constexpr u32 PROGRAM_OFFSET{10}; 14static constexpr u32 PROGRAM_OFFSET{10};
15 15
16ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config) { 16ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
17 std::string out = "#version 430 core\n"; 17 std::string out = "#version 430 core\n";
18 out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; 18 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
19 out += Decompiler::GetCommonDeclarations(); 19 out += Decompiler::GetCommonDeclarations();
@@ -75,7 +75,7 @@ void main() {
75 return {out, program.second}; 75 return {out, program.second};
76} 76}
77 77
78ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config) { 78ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
79 std::string out = "#version 430 core\n"; 79 std::string out = "#version 430 core\n";
80 out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; 80 out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
81 out += Decompiler::GetCommonDeclarations(); 81 out += Decompiler::GetCommonDeclarations();
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 4e5a6f130..c788099d4 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -6,12 +6,9 @@
6 6
7#include <array> 7#include <array>
8#include <string> 8#include <string>
9#include <type_traits>
10#include <utility>
11#include <vector> 9#include <vector>
12#include <boost/functional/hash.hpp> 10
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/hash.h"
15 12
16namespace OpenGL::GLShader { 13namespace OpenGL::GLShader {
17 14
@@ -124,18 +121,8 @@ struct ShaderSetup {
124 ProgramCode code_b; // Used for dual vertex shaders 121 ProgramCode code_b; // Used for dual vertex shaders
125 } program; 122 } program;
126 123
127 bool program_code_hash_dirty = true;
128
129 u64 GetProgramCodeHash() {
130 if (program_code_hash_dirty) {
131 program_code_hash = GetNewHash();
132 program_code_hash_dirty = false;
133 }
134 return program_code_hash;
135 }
136
137 /// Used in scenarios where we have a dual vertex shaders 124 /// Used in scenarios where we have a dual vertex shaders
138 void SetProgramB(ProgramCode program_b) { 125 void SetProgramB(ProgramCode&& program_b) {
139 program.code_b = std::move(program_b); 126 program.code_b = std::move(program_b);
140 has_program_b = true; 127 has_program_b = true;
141 } 128 }
@@ -145,73 +132,19 @@ struct ShaderSetup {
145 } 132 }
146 133
147private: 134private:
148 u64 GetNewHash() const {
149 size_t hash = 0;
150
151 const u64 hash_a = Common::ComputeHash64(program.code.data(), program.code.size());
152 boost::hash_combine(hash, hash_a);
153
154 if (has_program_b) {
155 // Compute hash over dual shader programs
156 const u64 hash_b = Common::ComputeHash64(program.code_b.data(), program.code_b.size());
157 boost::hash_combine(hash, hash_b);
158 }
159
160 return hash;
161 }
162
163 u64 program_code_hash{};
164 bool has_program_b{}; 135 bool has_program_b{};
165}; 136};
166 137
167struct MaxwellShaderConfigCommon {
168 void Init(ShaderSetup& setup) {
169 program_hash = setup.GetProgramCodeHash();
170 }
171
172 u64 program_hash;
173};
174
175struct MaxwellVSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
176 explicit MaxwellVSConfig(ShaderSetup& setup) {
177 state.Init(setup);
178 }
179};
180
181struct MaxwellFSConfig : Common::HashableStruct<MaxwellShaderConfigCommon> {
182 explicit MaxwellFSConfig(ShaderSetup& setup) {
183 state.Init(setup);
184 }
185};
186
187/** 138/**
188 * Generates the GLSL vertex shader program source code for the given VS program 139 * Generates the GLSL vertex shader program source code for the given VS program
189 * @returns String of the shader source code 140 * @returns String of the shader source code
190 */ 141 */
191ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConfig& config); 142ProgramResult GenerateVertexShader(const ShaderSetup& setup);
192 143
193/** 144/**
194 * Generates the GLSL fragment shader program source code for the given FS program 145 * Generates the GLSL fragment shader program source code for the given FS program
195 * @returns String of the shader source code 146 * @returns String of the shader source code
196 */ 147 */
197ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSConfig& config); 148ProgramResult GenerateFragmentShader(const ShaderSetup& setup);
198 149
199} // namespace OpenGL::GLShader 150} // namespace OpenGL::GLShader
200
201namespace std {
202
203template <>
204struct hash<OpenGL::GLShader::MaxwellVSConfig> {
205 size_t operator()(const OpenGL::GLShader::MaxwellVSConfig& k) const {
206 return k.Hash();
207 }
208};
209
210template <>
211struct hash<OpenGL::GLShader::MaxwellFSConfig> {
212 size_t operator()(const OpenGL::GLShader::MaxwellFSConfig& k) const {
213 return k.Hash();
214 }
215};
216
217} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 8960afef5..022d32a86 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -3,39 +3,10 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h" 5#include "core/core.h"
6#include "core/hle/kernel/process.h"
7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/renderer_opengl/gl_shader_manager.h" 6#include "video_core/renderer_opengl/gl_shader_manager.h"
9 7
10namespace OpenGL::GLShader { 8namespace OpenGL::GLShader {
11 9
12namespace Impl {
13static void SetShaderUniformBlockBinding(GLuint shader, const char* name,
14 Maxwell3D::Regs::ShaderStage binding,
15 size_t expected_size) {
16 const GLuint ub_index = glGetUniformBlockIndex(shader, name);
17 if (ub_index == GL_INVALID_INDEX) {
18 return;
19 }
20
21 GLint ub_size = 0;
22 glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size);
23 ASSERT_MSG(static_cast<size_t>(ub_size) == expected_size,
24 "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size);
25 glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding));
26}
27
28void SetShaderUniformBlockBindings(GLuint shader) {
29 SetShaderUniformBlockBinding(shader, "vs_config", Maxwell3D::Regs::ShaderStage::Vertex,
30 sizeof(MaxwellUniformData));
31 SetShaderUniformBlockBinding(shader, "gs_config", Maxwell3D::Regs::ShaderStage::Geometry,
32 sizeof(MaxwellUniformData));
33 SetShaderUniformBlockBinding(shader, "fs_config", Maxwell3D::Regs::ShaderStage::Fragment,
34 sizeof(MaxwellUniformData));
35}
36
37} // namespace Impl
38
39void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { 10void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
40 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 11 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
41 const auto& regs = gpu.regs; 12 const auto& regs = gpu.regs;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 0e7085776..533e42caa 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,12 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <tuple>
8#include <unordered_map>
9#include <boost/functional/hash.hpp>
10#include <glad/glad.h> 7#include <glad/glad.h>
8
11#include "video_core/renderer_opengl/gl_resource_manager.h" 9#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/gl_shader_gen.h"
13#include "video_core/renderer_opengl/maxwell_to_gl.h" 10#include "video_core/renderer_opengl/maxwell_to_gl.h"
14 11
15namespace OpenGL::GLShader { 12namespace OpenGL::GLShader {
@@ -19,10 +16,6 @@ static constexpr size_t NumTextureSamplers = 32;
19 16
20using Tegra::Engines::Maxwell3D; 17using Tegra::Engines::Maxwell3D;
21 18
22namespace Impl {
23void SetShaderUniformBlockBindings(GLuint shader);
24} // namespace Impl
25
26/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned 19/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
27// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at 20// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
28// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. 21// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
@@ -36,102 +29,22 @@ static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure si
36static_assert(sizeof(MaxwellUniformData) < 16384, 29static_assert(sizeof(MaxwellUniformData) < 16384,
37 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); 30 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
38 31
39class OGLShaderStage {
40public:
41 OGLShaderStage() = default;
42
43 void Create(const ProgramResult& program_result, GLenum type) {
44 OGLShader shader;
45 shader.Create(program_result.first.c_str(), type);
46 program.Create(true, shader.handle);
47 Impl::SetShaderUniformBlockBindings(program.handle);
48 entries = program_result.second;
49 }
50 GLuint GetHandle() const {
51 return program.handle;
52 }
53
54 ShaderEntries GetEntries() const {
55 return entries;
56 }
57
58private:
59 OGLProgram program;
60 ShaderEntries entries;
61};
62
63// TODO(wwylele): beautify this doc
64// This is a shader cache designed for translating PICA shader to GLSL shader.
65// The double cache is needed because diffent KeyConfigType, which includes a hash of the code
66// region (including its leftover unused code) can generate the same GLSL code.
67template <typename KeyConfigType,
68 ProgramResult (*CodeGenerator)(const ShaderSetup&, const KeyConfigType&),
69 GLenum ShaderType>
70class ShaderCache {
71public:
72 ShaderCache() = default;
73
74 using Result = std::pair<GLuint, ShaderEntries>;
75
76 Result Get(const KeyConfigType& key, const ShaderSetup& setup) {
77 auto map_it = shader_map.find(key);
78 if (map_it == shader_map.end()) {
79 ProgramResult program = CodeGenerator(setup, key);
80
81 auto [iter, new_shader] = shader_cache.emplace(program.first, OGLShaderStage{});
82 OGLShaderStage& cached_shader = iter->second;
83 if (new_shader) {
84 cached_shader.Create(program, ShaderType);
85 }
86 shader_map[key] = &cached_shader;
87 return {cached_shader.GetHandle(), program.second};
88 } else {
89 return {map_it->second->GetHandle(), map_it->second->GetEntries()};
90 }
91 }
92
93private:
94 std::unordered_map<KeyConfigType, OGLShaderStage*> shader_map;
95 std::unordered_map<std::string, OGLShaderStage> shader_cache;
96};
97
98using VertexShaders = ShaderCache<MaxwellVSConfig, &GenerateVertexShader, GL_VERTEX_SHADER>;
99
100using FragmentShaders = ShaderCache<MaxwellFSConfig, &GenerateFragmentShader, GL_FRAGMENT_SHADER>;
101
102class ProgramManager { 32class ProgramManager {
103public: 33public:
104 ProgramManager() { 34 ProgramManager() {
105 pipeline.Create(); 35 pipeline.Create();
106 } 36 }
107 37
108 ShaderEntries UseProgrammableVertexShader(const MaxwellVSConfig& config, 38 void UseProgrammableVertexShader(GLuint program) {
109 const ShaderSetup& setup) { 39 vs = program;
110 ShaderEntries result;
111 std::tie(current.vs, result) = vertex_shaders.Get(config, setup);
112 return result;
113 }
114
115 ShaderEntries UseProgrammableFragmentShader(const MaxwellFSConfig& config,
116 const ShaderSetup& setup) {
117 ShaderEntries result;
118 std::tie(current.fs, result) = fragment_shaders.Get(config, setup);
119 return result;
120 } 40 }
121 41
122 GLuint GetCurrentProgramStage(Maxwell3D::Regs::ShaderStage stage) const { 42 void UseProgrammableFragmentShader(GLuint program) {
123 switch (stage) { 43 fs = program;
124 case Maxwell3D::Regs::ShaderStage::Vertex:
125 return current.vs;
126 case Maxwell3D::Regs::ShaderStage::Fragment:
127 return current.fs;
128 }
129
130 UNREACHABLE();
131 } 44 }
132 45
133 void UseTrivialGeometryShader() { 46 void UseTrivialGeometryShader() {
134 current.gs = 0; 47 gs = 0;
135 } 48 }
136 49
137 void ApplyTo(OpenGLState& state) { 50 void ApplyTo(OpenGLState& state) {
@@ -140,35 +53,16 @@ public:
140 GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 53 GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT,
141 0); 54 0);
142 55
143 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current.vs); 56 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vs);
144 glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current.gs); 57 glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, gs);
145 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current.fs); 58 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fs);
146 state.draw.shader_program = 0; 59 state.draw.shader_program = 0;
147 state.draw.program_pipeline = pipeline.handle; 60 state.draw.program_pipeline = pipeline.handle;
148 } 61 }
149 62
150private: 63private:
151 struct ShaderTuple {
152 GLuint vs = 0, gs = 0, fs = 0;
153 bool operator==(const ShaderTuple& rhs) const {
154 return std::tie(vs, gs, fs) == std::tie(rhs.vs, rhs.gs, rhs.fs);
155 }
156 struct Hash {
157 std::size_t operator()(const ShaderTuple& tuple) const {
158 std::size_t hash = 0;
159 boost::hash_combine(hash, tuple.vs);
160 boost::hash_combine(hash, tuple.gs);
161 boost::hash_combine(hash, tuple.fs);
162 return hash;
163 }
164 };
165 };
166 ShaderTuple current;
167 VertexShaders vertex_shaders;
168 FragmentShaders fragment_shaders;
169
170 std::unordered_map<ShaderTuple, OGLProgram, ShaderTuple::Hash> program_cache;
171 OGLPipeline pipeline; 64 OGLPipeline pipeline;
65 GLuint vs{}, fs{}, gs{};
172}; 66};
173 67
174} // namespace OpenGL::GLShader 68} // namespace OpenGL::GLShader