summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-05-23 04:28:34 -0300
committerGravatar ameerj2021-07-22 21:51:30 -0400
commitd621e96d0de212cc16897eadf71e8a1b2e1eb5dc (patch)
tree8695f2f4dddf2564b63e4574d6616ccb0e79568c
parentspirv: Be aware of NAN unaware drivers (diff)
downloadyuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.gz
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.xz
yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.zip
shader: Initial OpenGL implementation
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h53
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h40
-rw-r--r--src/video_core/renderer_opengl/gl_compute_program.cpp178
-rw-r--r--src/video_core/renderer_opengl/gl_compute_program.h83
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_device.h16
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_program.cpp296
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_program.h105
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp275
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h98
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp146
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h73
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp257
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h29
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h108
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp17
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_opengl/util_shaders.cpp13
-rw-r--r--src/video_core/renderer_vulkan/pipeline_helper.h17
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp11
-rw-r--r--src/video_core/shader_cache.cpp17
-rw-r--r--src/video_core/shader_cache.h23
-rw-r--r--src/video_core/shader_environment.cpp4
-rw-r--r--src/video_core/shader_environment.h16
-rw-r--r--src/video_core/texture_cache/formatter.cpp4
-rw-r--r--src/video_core/texture_cache/formatter.h3
-rw-r--r--src/video_core/textures/texture.h9
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp2
38 files changed, 1427 insertions, 705 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index b3c9fe72a..5913fdeff 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() {
355 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; 355 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
356} 356}
357 357
358Value IREmitter::LocalInvocationId() {
359 return Inst(Opcode::LocalInvocationId);
360}
361
358U32 IREmitter::LocalInvocationIdX() { 362U32 IREmitter::LocalInvocationIdX() {
359 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; 363 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
360} 364}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 4441c495d..a12919283 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -95,6 +95,7 @@ public:
95 [[nodiscard]] U32 WorkgroupIdY(); 95 [[nodiscard]] U32 WorkgroupIdY();
96 [[nodiscard]] U32 WorkgroupIdZ(); 96 [[nodiscard]] U32 WorkgroupIdZ();
97 97
98 [[nodiscard]] Value LocalInvocationId();
98 [[nodiscard]] U32 LocalInvocationIdX(); 99 [[nodiscard]] U32 LocalInvocationIdX();
99 [[nodiscard]] U32 LocalInvocationIdY(); 100 [[nodiscard]] U32 LocalInvocationIdY();
100 [[nodiscard]] U32 LocalInvocationIdZ(); 101 [[nodiscard]] U32 LocalInvocationIdZ();
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
index b0baff74b..01fb6f5e5 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -120,6 +120,13 @@ enum class SpecialRegister : u64 {
120 case SpecialRegister::SR_INVOCATION_INFO: 120 case SpecialRegister::SR_INVOCATION_INFO:
121 // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); 121 // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed");
122 return ir.Imm32(0x00ff'0000); 122 return ir.Imm32(0x00ff'0000);
123 case SpecialRegister::SR_TID: {
124 const IR::Value tid{ir.LocalInvocationId()};
125 return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
126 IR::U32{ir.CompositeExtract(tid, 1)},
127 ir.Imm32(16), ir.Imm32(8)),
128 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
129 }
123 case SpecialRegister::SR_TID_X: 130 case SpecialRegister::SR_TID_X:
124 return ir.LocalInvocationIdX(); 131 return ir.LocalInvocationIdX();
125 case SpecialRegister::SR_TID_Y: 132 case SpecialRegister::SR_TID_Y:
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6e0e4b8f5..b008c37c0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -67,10 +67,14 @@ add_library(video_core STATIC
67 renderer_base.h 67 renderer_base.h
68 renderer_opengl/gl_buffer_cache.cpp 68 renderer_opengl/gl_buffer_cache.cpp
69 renderer_opengl/gl_buffer_cache.h 69 renderer_opengl/gl_buffer_cache.h
70 renderer_opengl/gl_compute_program.cpp
71 renderer_opengl/gl_compute_program.h
70 renderer_opengl/gl_device.cpp 72 renderer_opengl/gl_device.cpp
71 renderer_opengl/gl_device.h 73 renderer_opengl/gl_device.h
72 renderer_opengl/gl_fence_manager.cpp 74 renderer_opengl/gl_fence_manager.cpp
73 renderer_opengl/gl_fence_manager.h 75 renderer_opengl/gl_fence_manager.h
76 renderer_opengl/gl_graphics_program.cpp
77 renderer_opengl/gl_graphics_program.h
74 renderer_opengl/gl_rasterizer.cpp 78 renderer_opengl/gl_rasterizer.cpp
75 renderer_opengl/gl_rasterizer.h 79 renderer_opengl/gl_rasterizer.h
76 renderer_opengl/gl_resource_manager.cpp 80 renderer_opengl/gl_resource_manager.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 29746f61d..6c92e4c30 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -70,8 +70,8 @@ class BufferCache {
70 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; 70 P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
71 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; 71 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
72 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; 72 static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
73 static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX;
74 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; 73 static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
74 static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
75 75
76 static constexpr BufferId NULL_BUFFER_ID{0}; 76 static constexpr BufferId NULL_BUFFER_ID{0};
77 77
@@ -154,7 +154,7 @@ public:
154 void UnbindGraphicsTextureBuffers(size_t stage); 154 void UnbindGraphicsTextureBuffers(size_t stage);
155 155
156 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, 156 void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
157 PixelFormat format, bool is_written); 157 PixelFormat format, bool is_written, bool is_image);
158 158
159 void UnbindComputeStorageBuffers(); 159 void UnbindComputeStorageBuffers();
160 160
@@ -164,7 +164,7 @@ public:
164 void UnbindComputeTextureBuffers(); 164 void UnbindComputeTextureBuffers();
165 165
166 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, 166 void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
167 bool is_written); 167 bool is_written, bool is_image);
168 168
169 void FlushCachedWrites(); 169 void FlushCachedWrites();
170 170
@@ -197,6 +197,7 @@ public:
197 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); 197 [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
198 198
199 std::mutex mutex; 199 std::mutex mutex;
200 Runtime& runtime;
200 201
201private: 202private:
202 template <typename Func> 203 template <typename Func>
@@ -366,7 +367,6 @@ private:
366 Tegra::Engines::KeplerCompute& kepler_compute; 367 Tegra::Engines::KeplerCompute& kepler_compute;
367 Tegra::MemoryManager& gpu_memory; 368 Tegra::MemoryManager& gpu_memory;
368 Core::Memory::Memory& cpu_memory; 369 Core::Memory::Memory& cpu_memory;
369 Runtime& runtime;
370 370
371 SlotVector<Buffer> slot_buffers; 371 SlotVector<Buffer> slot_buffers;
372 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; 372 DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@@ -394,8 +394,10 @@ private:
394 394
395 std::array<u32, NUM_STAGES> enabled_texture_buffers{}; 395 std::array<u32, NUM_STAGES> enabled_texture_buffers{};
396 std::array<u32, NUM_STAGES> written_texture_buffers{}; 396 std::array<u32, NUM_STAGES> written_texture_buffers{};
397 std::array<u32, NUM_STAGES> image_texture_buffers{};
397 u32 enabled_compute_texture_buffers = 0; 398 u32 enabled_compute_texture_buffers = 0;
398 u32 written_compute_texture_buffers = 0; 399 u32 written_compute_texture_buffers = 0;
400 u32 image_compute_texture_buffers = 0;
399 401
400 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; 402 std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
401 403
@@ -431,8 +433,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
431 Tegra::Engines::KeplerCompute& kepler_compute_, 433 Tegra::Engines::KeplerCompute& kepler_compute_,
432 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, 434 Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
433 Runtime& runtime_) 435 Runtime& runtime_)
434 : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, 436 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
435 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { 437 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
436 // Ensure the first slot is used for the null buffer 438 // Ensure the first slot is used for the null buffer
437 void(slot_buffers.insert(runtime, NullBufferParams{})); 439 void(slot_buffers.insert(runtime, NullBufferParams{}));
438 deletion_iterator = slot_buffers.end(); 440 deletion_iterator = slot_buffers.end();
@@ -703,13 +705,18 @@ template <class P>
703void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { 705void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
704 enabled_texture_buffers[stage] = 0; 706 enabled_texture_buffers[stage] = 0;
705 written_texture_buffers[stage] = 0; 707 written_texture_buffers[stage] = 0;
708 image_texture_buffers[stage] = 0;
706} 709}
707 710
708template <class P> 711template <class P>
709void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, 712void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
710 u32 size, PixelFormat format, bool is_written) { 713 u32 size, PixelFormat format, bool is_written,
714 bool is_image) {
711 enabled_texture_buffers[stage] |= 1U << tbo_index; 715 enabled_texture_buffers[stage] |= 1U << tbo_index;
712 written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; 716 written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
717 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
718 image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
719 }
713 texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); 720 texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
714} 721}
715 722
@@ -717,6 +724,7 @@ template <class P>
717void BufferCache<P>::UnbindComputeStorageBuffers() { 724void BufferCache<P>::UnbindComputeStorageBuffers() {
718 enabled_compute_storage_buffers = 0; 725 enabled_compute_storage_buffers = 0;
719 written_compute_storage_buffers = 0; 726 written_compute_storage_buffers = 0;
727 image_compute_texture_buffers = 0;
720} 728}
721 729
722template <class P> 730template <class P>
@@ -737,13 +745,17 @@ template <class P>
737void BufferCache<P>::UnbindComputeTextureBuffers() { 745void BufferCache<P>::UnbindComputeTextureBuffers() {
738 enabled_compute_texture_buffers = 0; 746 enabled_compute_texture_buffers = 0;
739 written_compute_texture_buffers = 0; 747 written_compute_texture_buffers = 0;
748 image_compute_texture_buffers = 0;
740} 749}
741 750
742template <class P> 751template <class P>
743void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, 752void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
744 PixelFormat format, bool is_written) { 753 PixelFormat format, bool is_written, bool is_image) {
745 enabled_compute_texture_buffers |= 1U << tbo_index; 754 enabled_compute_texture_buffers |= 1U << tbo_index;
746 written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; 755 written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
756 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
757 image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
758 }
747 compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); 759 compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
748} 760}
749 761
@@ -1057,7 +1069,6 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
1057 1069
1058template <class P> 1070template <class P>
1059void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { 1071void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1060 u32 binding_index = 0;
1061 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { 1072 ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
1062 const TextureBufferBinding& binding = texture_buffers[stage][index]; 1073 const TextureBufferBinding& binding = texture_buffers[stage][index];
1063 Buffer& buffer = slot_buffers[binding.buffer_id]; 1074 Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1066,9 +1077,12 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
1066 1077
1067 const u32 offset = buffer.Offset(binding.cpu_addr); 1078 const u32 offset = buffer.Offset(binding.cpu_addr);
1068 const PixelFormat format = binding.format; 1079 const PixelFormat format = binding.format;
1069 if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { 1080 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1070 runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); 1081 if (((image_texture_buffers[stage] >> index) & 1) != 0) {
1071 ++binding_index; 1082 runtime.BindImageBuffer(buffer, offset, size, format);
1083 } else {
1084 runtime.BindTextureBuffer(buffer, offset, size, format);
1085 }
1072 } else { 1086 } else {
1073 runtime.BindTextureBuffer(buffer, offset, size, format); 1087 runtime.BindTextureBuffer(buffer, offset, size, format);
1074 } 1088 }
@@ -1139,7 +1153,6 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1139 1153
1140template <class P> 1154template <class P>
1141void BufferCache<P>::BindHostComputeTextureBuffers() { 1155void BufferCache<P>::BindHostComputeTextureBuffers() {
1142 u32 binding_index = 0;
1143 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { 1156 ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
1144 const TextureBufferBinding& binding = compute_texture_buffers[index]; 1157 const TextureBufferBinding& binding = compute_texture_buffers[index];
1145 Buffer& buffer = slot_buffers[binding.buffer_id]; 1158 Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1148,9 +1161,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
1148 1161
1149 const u32 offset = buffer.Offset(binding.cpu_addr); 1162 const u32 offset = buffer.Offset(binding.cpu_addr);
1150 const PixelFormat format = binding.format; 1163 const PixelFormat format = binding.format;
1151 if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { 1164 if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
1152 runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); 1165 if (((image_compute_texture_buffers >> index) & 1) != 0) {
1153 ++binding_index; 1166 runtime.BindImageBuffer(buffer, offset, size, format);
1167 } else {
1168 runtime.BindTextureBuffer(buffer, offset, size, format);
1169 }
1154 } else { 1170 } else {
1155 runtime.BindTextureBuffer(buffer, offset, size, format); 1171 runtime.BindTextureBuffer(buffer, offset, size, format);
1156 } 1172 }
@@ -1339,11 +1355,10 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
1339 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1355 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
1340 // Resolve buffer 1356 // Resolve buffer
1341 Binding& binding = compute_storage_buffers[index]; 1357 Binding& binding = compute_storage_buffers[index];
1342 const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); 1358 binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
1343 binding.buffer_id = buffer_id;
1344 // Mark as written if needed 1359 // Mark as written if needed
1345 if (((written_compute_storage_buffers >> index) & 1) != 0) { 1360 if (((written_compute_storage_buffers >> index) & 1) != 0) {
1346 MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); 1361 MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
1347 } 1362 }
1348 }); 1363 });
1349} 1364}
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index c4189fb60..2d0ef1307 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,14 +2,18 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
5#include <span> 6#include <span>
6 7
7#include "video_core/buffer_cache/buffer_cache.h" 8#include "video_core/buffer_cache/buffer_cache.h"
8#include "video_core/renderer_opengl/gl_buffer_cache.h" 9#include "video_core/renderer_opengl/gl_buffer_cache.h"
9#include "video_core/renderer_opengl/gl_device.h" 10#include "video_core/renderer_opengl/gl_device.h"
11#include "video_core/renderer_opengl/maxwell_to_gl.h"
10 12
11namespace OpenGL { 13namespace OpenGL {
12namespace { 14namespace {
15using VideoCore::Surface::PixelFormat;
16
13struct BindlessSSBO { 17struct BindlessSSBO {
14 GLuint64EXT address; 18 GLuint64EXT address;
15 GLsizei length; 19 GLsizei length;
@@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept {
62 glMakeNamedBufferResidentNV(buffer.handle, access); 66 glMakeNamedBufferResidentNV(buffer.handle, access);
63} 67}
64 68
69GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
70 const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
71 return offset == view.offset && size == view.size && format == view.format;
72 })};
73 if (it != views.end()) {
74 return it->texture.handle;
75 }
76 OGLTexture texture;
77 texture.Create(GL_TEXTURE_BUFFER);
78 const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
79 glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size);
80 views.push_back({
81 .offset = offset,
82 .size = size,
83 .format = format,
84 .texture = std::move(texture),
85 });
86 return views.back().texture.handle;
87}
88
65BufferCacheRuntime::BufferCacheRuntime(const Device& device_) 89BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
66 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, 90 : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
67 use_assembly_shaders{device.UseAssemblyShaders()}, 91 use_assembly_shaders{device.UseAssemblyShaders()},
@@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
144 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, 168 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
145 static_cast<GLsizeiptr>(size)); 169 static_cast<GLsizeiptr>(size));
146 } else { 170 } else {
147 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 171 const GLuint base_binding = graphics_base_uniform_bindings[stage];
148 const GLuint binding = base_binding + binding_index; 172 const GLuint binding = base_binding + binding_index;
149 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), 173 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
150 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 174 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
181 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, 205 glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
182 reinterpret_cast<const GLuint*>(&ssbo)); 206 reinterpret_cast<const GLuint*>(&ssbo));
183 } else { 207 } else {
184 const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; 208 const GLuint base_binding = graphics_base_storage_bindings[stage];
185 const GLuint binding = base_binding + binding_index; 209 const GLuint binding = base_binding + binding_index;
186 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), 210 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
187 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 211 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
213 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 237 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
214} 238}
215 239
240void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
241 PixelFormat format) {
242 *texture_handles++ = buffer.View(offset, size, format);
243}
244
245void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
246 *image_handles++ = buffer.View(offset, size, format);
247}
248
216} // namespace OpenGL 249} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index ddcce5e97..4986c65fd 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -32,6 +32,8 @@ public:
32 32
33 void MakeResident(GLenum access) noexcept; 33 void MakeResident(GLenum access) noexcept;
34 34
35 [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
36
35 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { 37 [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
36 return address; 38 return address;
37 } 39 }
@@ -41,9 +43,17 @@ public:
41 } 43 }
42 44
43private: 45private:
46 struct BufferView {
47 u32 offset;
48 u32 size;
49 VideoCore::Surface::PixelFormat format;
50 OGLTexture texture;
51 };
52
44 GLuint64EXT address = 0; 53 GLuint64EXT address = 0;
45 OGLBuffer buffer; 54 OGLBuffer buffer;
46 GLenum current_residency_access = GL_NONE; 55 GLenum current_residency_access = GL_NONE;
56 std::vector<BufferView> views;
47}; 57};
48 58
49class BufferCacheRuntime { 59class BufferCacheRuntime {
@@ -75,13 +85,19 @@ public:
75 85
76 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); 86 void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
77 87
88 void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
89 VideoCore::Surface::PixelFormat format);
90
91 void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
92 VideoCore::Surface::PixelFormat format);
93
78 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { 94 void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
79 if (use_assembly_shaders) { 95 if (use_assembly_shaders) {
80 const GLuint handle = fast_uniforms[stage][binding_index].handle; 96 const GLuint handle = fast_uniforms[stage][binding_index].handle;
81 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); 97 const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
82 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); 98 glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
83 } else { 99 } else {
84 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 100 const GLuint base_binding = graphics_base_uniform_bindings[stage];
85 const GLuint binding = base_binding + binding_index; 101 const GLuint binding = base_binding + binding_index;
86 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 102 glBindBufferRange(GL_UNIFORM_BUFFER, binding,
87 fast_uniforms[stage][binding_index].handle, 0, 103 fast_uniforms[stage][binding_index].handle, 0,
@@ -103,7 +119,7 @@ public:
103 119
104 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { 120 std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
105 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); 121 const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
106 const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; 122 const GLuint base_binding = graphics_base_uniform_bindings[stage];
107 const GLuint binding = base_binding + binding_index; 123 const GLuint binding = base_binding + binding_index;
108 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), 124 glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
109 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); 125 static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@@ -118,6 +134,19 @@ public:
118 return has_fast_buffer_sub_data; 134 return has_fast_buffer_sub_data;
119 } 135 }
120 136
137 void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
138 graphics_base_uniform_bindings = bindings;
139 }
140
141 void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
142 graphics_base_storage_bindings = bindings;
143 }
144
145 void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
146 texture_handles = texture_handles_;
147 image_handles = image_handles_;
148 }
149
121private: 150private:
122 static constexpr std::array PABO_LUT{ 151 static constexpr std::array PABO_LUT{
123 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 152 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@@ -133,6 +162,11 @@ private:
133 162
134 u32 max_attributes = 0; 163 u32 max_attributes = 0;
135 164
165 std::array<GLuint, 5> graphics_base_uniform_bindings{};
166 std::array<GLuint, 5> graphics_base_storage_bindings{};
167 GLuint* texture_handles = nullptr;
168 GLuint* image_handles = nullptr;
169
136 std::optional<StreamBuffer> stream_buffer; 170 std::optional<StreamBuffer> stream_buffer;
137 171
138 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, 172 std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@@ -155,8 +189,8 @@ struct BufferCacheParams {
155 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; 189 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
156 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; 190 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
157 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; 191 static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
158 static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true;
159 static constexpr bool USE_MEMORY_MAPS = false; 192 static constexpr bool USE_MEMORY_MAPS = false;
193 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
160}; 194};
161 195
162using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 196using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp
new file mode 100644
index 000000000..d5ef65439
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_program.cpp
@@ -0,0 +1,178 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include "common/cityhash.h"
8#include "video_core/renderer_opengl/gl_compute_program.h"
9#include "video_core/renderer_opengl/gl_shader_manager.h"
10
11namespace OpenGL {
12
13using Shader::ImageBufferDescriptor;
14using Tegra::Texture::TexturePair;
15using VideoCommon::ImageId;
16
17constexpr u32 MAX_TEXTURES = 64;
18constexpr u32 MAX_IMAGES = 16;
19
20size_t ComputeProgramKey::Hash() const noexcept {
21 return static_cast<size_t>(
22 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
23}
24
25bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept {
26 return std::memcmp(this, &rhs, sizeof *this) == 0;
27}
28
29ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
30 Tegra::MemoryManager& gpu_memory_,
31 Tegra::Engines::KeplerCompute& kepler_compute_,
32 ProgramManager& program_manager_, OGLProgram program_,
33 const Shader::Info& info_)
34 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
35 kepler_compute{kepler_compute_},
36 program_manager{program_manager_}, program{std::move(program_)}, info{info_} {
37 for (const auto& desc : info.texture_buffer_descriptors) {
38 num_texture_buffers += desc.count;
39 }
40 for (const auto& desc : info.image_buffer_descriptors) {
41 num_image_buffers += desc.count;
42 }
43 u32 num_textures = num_texture_buffers;
44 for (const auto& desc : info.texture_descriptors) {
45 num_textures += desc.count;
46 }
47 ASSERT(num_textures <= MAX_TEXTURES);
48
49 u32 num_images = num_image_buffers;
50 for (const auto& desc : info.image_descriptors) {
51 num_images += desc.count;
52 }
53 ASSERT(num_images <= MAX_IMAGES);
54}
55
56void ComputeProgram::Configure() {
57 buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
58 buffer_cache.UnbindComputeStorageBuffers();
59 size_t ssbo_index{};
60 for (const auto& desc : info.storage_buffers_descriptors) {
61 ASSERT(desc.count == 1);
62 buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
63 desc.is_written);
64 ++ssbo_index;
65 }
66 texture_cache.SynchronizeComputeDescriptors();
67
68 std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
69 boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
70 std::array<GLuint, MAX_TEXTURES> samplers;
71 std::array<GLuint, MAX_TEXTURES> textures;
72 std::array<GLuint, MAX_IMAGES> images;
73 GLsizei sampler_binding{};
74 GLsizei texture_binding{};
75 GLsizei image_binding{};
76
77 const auto& qmd{kepler_compute.launch_description};
78 const auto& cbufs{qmd.const_buffer_config};
79 const bool via_header_index{qmd.linked_tsc != 0};
80 const auto read_handle{[&](const auto& desc, u32 index) {
81 ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
82 const u32 index_offset{index << desc.size_shift};
83 const u32 offset{desc.cbuf_offset + index_offset};
84 const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
85 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
86 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
87 if (desc.has_secondary) {
88 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
89 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
90 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
91 secondary_offset};
92 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
93 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
94 return TexturePair(lhs_raw | rhs_raw, via_header_index);
95 }
96 }
97 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
98 }};
99 const auto add_image{[&](const auto& desc) {
100 for (u32 index = 0; index < desc.count; ++index) {
101 const auto handle{read_handle(desc, index)};
102 image_view_indices.push_back(handle.first);
103 }
104 }};
105 for (const auto& desc : info.texture_buffer_descriptors) {
106 for (u32 index = 0; index < desc.count; ++index) {
107 const auto handle{read_handle(desc, index)};
108 image_view_indices.push_back(handle.first);
109 samplers[sampler_binding++] = 0;
110 }
111 }
112 std::ranges::for_each(info.image_buffer_descriptors, add_image);
113 for (const auto& desc : info.texture_descriptors) {
114 for (u32 index = 0; index < desc.count; ++index) {
115 const auto handle{read_handle(desc, index)};
116 image_view_indices.push_back(handle.first);
117
118 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
119 samplers[sampler_binding++] = sampler->Handle();
120 }
121 }
122 std::ranges::for_each(info.image_descriptors, add_image);
123
124 const std::span indices_span(image_view_indices.data(), image_view_indices.size());
125 texture_cache.FillComputeImageViews(indices_span, image_view_ids);
126
127 buffer_cache.UnbindComputeTextureBuffers();
128 size_t texbuf_index{};
129 const auto add_buffer{[&](const auto& desc) {
130 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
131 for (u32 i = 0; i < desc.count; ++i) {
132 bool is_written{false};
133 if constexpr (is_image) {
134 is_written = desc.is_written;
135 }
136 ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
137 buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
138 image_view.BufferSize(), image_view.format,
139 is_written, is_image);
140 ++texbuf_index;
141 }
142 }};
143 std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
144 std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
145
146 buffer_cache.UpdateComputeBuffers();
147
148 buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
149 buffer_cache.BindHostComputeBuffers();
150
151 const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
152 texture_binding += num_texture_buffers;
153 image_binding += num_image_buffers;
154
155 for (const auto& desc : info.texture_descriptors) {
156 for (u32 index = 0; index < desc.count; ++index) {
157 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
158 textures[texture_binding++] = image_view.Handle(desc.type);
159 }
160 }
161 for (const auto& desc : info.image_descriptors) {
162 for (u32 index = 0; index < desc.count; ++index) {
163 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
164 images[image_binding++] = image_view.Handle(desc.type);
165 }
166 }
167 if (texture_binding != 0) {
168 ASSERT(texture_binding == sampler_binding);
169 glBindTextures(0, texture_binding, textures.data());
170 glBindSamplers(0, sampler_binding, samplers.data());
171 }
172 if (image_binding != 0) {
173 glBindImageTextures(0, image_binding, images.data());
174 }
175 program_manager.BindProgram(program.handle);
176}
177
178} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h
new file mode 100644
index 000000000..64a75d44d
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_compute_program.h
@@ -0,0 +1,83 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9#include <utility>
10
11#include "common/common_types.h"
12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_buffer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_texture_cache.h"
16
17namespace Tegra {
18class MemoryManager;
19}
20
21namespace Tegra::Engines {
22class KeplerCompute;
23}
24
25namespace Shader {
26struct Info;
27}
28
29namespace OpenGL {
30
31class ProgramManager;
32
33struct ComputeProgramKey {
34 u64 unique_hash;
35 u32 shared_memory_size;
36 std::array<u32, 3> workgroup_size;
37
38 size_t Hash() const noexcept;
39
40 bool operator==(const ComputeProgramKey&) const noexcept;
41
42 bool operator!=(const ComputeProgramKey& rhs) const noexcept {
43 return !operator==(rhs);
44 }
45};
46static_assert(std::has_unique_object_representations_v<ComputeProgramKey>);
47static_assert(std::is_trivially_copyable_v<ComputeProgramKey>);
48static_assert(std::is_trivially_constructible_v<ComputeProgramKey>);
49
50class ComputeProgram {
51public:
52 explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
53 Tegra::MemoryManager& gpu_memory_,
54 Tegra::Engines::KeplerCompute& kepler_compute_,
55 ProgramManager& program_manager_, OGLProgram program_,
56 const Shader::Info& info_);
57
58 void Configure();
59
60private:
61 TextureCache& texture_cache;
62 BufferCache& buffer_cache;
63 Tegra::MemoryManager& gpu_memory;
64 Tegra::Engines::KeplerCompute& kepler_compute;
65 ProgramManager& program_manager;
66
67 OGLProgram program;
68 Shader::Info info;
69
70 u32 num_texture_buffers{};
71 u32 num_image_buffers{};
72};
73
74} // namespace OpenGL
75
76namespace std {
77template <>
78struct hash<OpenGL::ComputeProgramKey> {
79 size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept {
80 return k.Hash();
81 }
82};
83} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 3b00614e7..18bbc4c1f 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -22,34 +22,11 @@
22 22
23namespace OpenGL { 23namespace OpenGL {
24namespace { 24namespace {
25// One uniform block is reserved for emulation purposes
26constexpr u32 ReservedUniformBlocks = 1;
27
28constexpr u32 NumStages = 5;
29
30constexpr std::array LIMIT_UBOS = { 25constexpr std::array LIMIT_UBOS = {
31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 26 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, 27 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, 28 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
34}; 29};
35constexpr std::array LIMIT_SSBOS = {
36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
39};
40constexpr std::array LIMIT_SAMPLERS = {
41 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
42 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
43 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
44 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
45 GL_MAX_TEXTURE_IMAGE_UNITS,
46 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
47};
48constexpr std::array LIMIT_IMAGES = {
49 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
50 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
51 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
52};
53 30
54template <typename T> 31template <typename T>
55T GetInteger(GLenum pname) { 32T GetInteger(GLenum pname) {
@@ -82,15 +59,6 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
82 return std::ranges::find(extensions, extension) != extensions.end(); 59 return std::ranges::find(extensions, extension) != extensions.end();
83} 60}
84 61
85u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
86 ASSERT(num >= amount);
87 if (limit) {
88 amount = std::min(amount, GetInteger<u32>(*limit));
89 }
90 num -= amount;
91 return std::exchange(base, base + amount);
92}
93
94std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { 62std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
95 std::array<u32, Tegra::Engines::MaxShaderTypes> max; 63 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
96 std::ranges::transform(LIMIT_UBOS, max.begin(), 64 std::ranges::transform(LIMIT_UBOS, max.begin(),
@@ -98,62 +66,6 @@ std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcep
98 return max; 66 return max;
99} 67}
100 68
101std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
102 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
103
104 static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
105 const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
106 const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
107 const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
108
109 u32 num_ubos = total_ubos - ReservedUniformBlocks;
110 u32 num_ssbos = total_ssbos;
111 u32 num_samplers = total_samplers;
112
113 u32 base_ubo = ReservedUniformBlocks;
114 u32 base_ssbo = 0;
115 u32 base_samplers = 0;
116
117 for (std::size_t i = 0; i < NumStages; ++i) {
118 const std::size_t stage = stage_swizzle[i];
119 bindings[stage] = {
120 Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
121 Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
122 Extract(base_samplers, num_samplers, total_samplers / NumStages,
123 LIMIT_SAMPLERS[stage])};
124 }
125
126 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
127 u32 base_images = 0;
128
129 // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
130 // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
131 // fragment stage, and at least 1 for the rest of the stages.
132 // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
133
134 // Reserve at least 4 image bindings on the fragment stage.
135 bindings[4].image =
136 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
137
138 // This is guaranteed to be at least 1.
139 const u32 total_extracted_images = num_images / (NumStages - 1);
140
141 // Reserve the other image bindings.
142 for (std::size_t i = 0; i < NumStages; ++i) {
143 const std::size_t stage = stage_swizzle[i];
144 if (stage == 4) {
145 continue;
146 }
147 bindings[stage].image =
148 Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
149 }
150
151 // Compute doesn't care about any of this.
152 bindings[5] = {0, 0, 0, 0};
153
154 return bindings;
155}
156
157bool IsASTCSupported() { 69bool IsASTCSupported() {
158 static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; 70 static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
159 static constexpr std::array formats = { 71 static constexpr std::array formats = {
@@ -225,7 +137,6 @@ Device::Device() {
225 } 137 }
226 138
227 max_uniform_buffers = BuildMaxUniformBuffers(); 139 max_uniform_buffers = BuildMaxUniformBuffers();
228 base_bindings = BuildBaseBindings();
229 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 140 uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
230 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 141 shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
231 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 142 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2c2b13767..152a3acd3 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -12,13 +12,6 @@ namespace OpenGL {
12 12
13class Device { 13class Device {
14public: 14public:
15 struct BaseBindings {
16 u32 uniform_buffer{};
17 u32 shader_storage_buffer{};
18 u32 sampler{};
19 u32 image{};
20 };
21
22 explicit Device(); 15 explicit Device();
23 explicit Device(std::nullptr_t); 16 explicit Device(std::nullptr_t);
24 17
@@ -28,14 +21,6 @@ public:
28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; 21 return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
29 } 22 }
30 23
31 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
32 return base_bindings[stage_index];
33 }
34
35 const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
36 return GetBaseBindings(static_cast<std::size_t>(shader_type));
37 }
38
39 size_t GetUniformBufferAlignment() const { 24 size_t GetUniformBufferAlignment() const {
40 return uniform_buffer_alignment; 25 return uniform_buffer_alignment;
41 } 26 }
@@ -134,7 +119,6 @@ private:
134 119
135 std::string vendor_name; 120 std::string vendor_name;
136 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; 121 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
137 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
138 size_t uniform_buffer_alignment{}; 122 size_t uniform_buffer_alignment{};
139 size_t shader_storage_alignment{}; 123 size_t shader_storage_alignment{};
140 u32 max_vertex_attributes{}; 124 u32 max_vertex_attributes{};
diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp
new file mode 100644
index 000000000..fd0958719
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp
@@ -0,0 +1,296 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <cstring>
6
7#include "common/cityhash.h"
8#include "shader_recompiler/shader_info.h"
9#include "video_core/renderer_opengl/gl_graphics_program.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11#include "video_core/renderer_opengl/gl_state_tracker.h"
12#include "video_core/texture_cache/texture_cache.h"
13
14namespace OpenGL {
15
16using Shader::ImageBufferDescriptor;
17using Tegra::Texture::TexturePair;
18using VideoCommon::ImageId;
19
20constexpr u32 MAX_TEXTURES = 64;
21constexpr u32 MAX_IMAGES = 8;
22
23size_t GraphicsProgramKey::Hash() const noexcept {
24 return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
25}
26
27bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept {
28 return std::memcmp(this, &rhs, Size()) == 0;
29}
30
31GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
32 Tegra::MemoryManager& gpu_memory_,
33 Tegra::Engines::Maxwell3D& maxwell3d_,
34 ProgramManager& program_manager_, StateTracker& state_tracker_,
35 OGLProgram program_,
36 const std::array<const Shader::Info*, 5>& infos)
37 : texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
38 gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
39 state_tracker{state_tracker_}, program{std::move(program_)} {
40 std::ranges::transform(infos, stage_infos.begin(),
41 [](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
42
43 u32 num_textures{};
44 u32 num_images{};
45 for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) {
46 const auto& info{stage_infos[stage]};
47 base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
48 base_storage_bindings[stage + 1] = base_storage_bindings[stage];
49 for (const auto& desc : info.constant_buffer_descriptors) {
50 base_uniform_bindings[stage + 1] += desc.count;
51 }
52 for (const auto& desc : info.storage_buffers_descriptors) {
53 base_storage_bindings[stage + 1] += desc.count;
54 }
55 for (const auto& desc : info.texture_buffer_descriptors) {
56 num_texture_buffers[stage] += desc.count;
57 num_textures += desc.count;
58 }
59 for (const auto& desc : info.image_buffer_descriptors) {
60 num_image_buffers[stage] += desc.count;
61 num_images += desc.count;
62 }
63 for (const auto& desc : info.texture_descriptors) {
64 num_textures += desc.count;
65 }
66 for (const auto& desc : info.image_descriptors) {
67 num_images += desc.count;
68 }
69 }
70 ASSERT(num_textures <= MAX_TEXTURES);
71 ASSERT(num_images <= MAX_IMAGES);
72}
73
74struct Spec {
75 static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
76 static constexpr bool has_storage_buffers = true;
77 static constexpr bool has_texture_buffers = true;
78 static constexpr bool has_image_buffers = true;
79 static constexpr bool has_images = true;
80};
81
82void GraphicsProgram::Configure(bool is_indexed) {
83 std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
84 std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
85 std::array<GLuint, MAX_TEXTURES> samplers;
86 size_t image_view_index{};
87 GLsizei sampler_binding{};
88
89 texture_cache.SynchronizeGraphicsDescriptors();
90
91 buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
92 buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
93
94 const auto& regs{maxwell3d.regs};
95 const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
96 const auto config_stage{[&](size_t stage) {
97 const Shader::Info& info{stage_infos[stage]};
98 buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
99 buffer_cache.UnbindGraphicsStorageBuffers(stage);
100 if constexpr (Spec::has_storage_buffers) {
101 size_t ssbo_index{};
102 for (const auto& desc : info.storage_buffers_descriptors) {
103 ASSERT(desc.count == 1);
104 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
105 desc.cbuf_offset, desc.is_written);
106 ++ssbo_index;
107 }
108 }
109 const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
110 const auto read_handle{[&](const auto& desc, u32 index) {
111 ASSERT(cbufs[desc.cbuf_index].enabled);
112 const u32 index_offset{index << desc.size_shift};
113 const u32 offset{desc.cbuf_offset + index_offset};
114 const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
115 if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
116 std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
117 if (desc.has_secondary) {
118 ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
119 const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
120 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
121 second_offset};
122 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
123 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
124 const u32 raw{lhs_raw | rhs_raw};
125 return TexturePair(raw, via_header_index);
126 }
127 }
128 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
129 }};
130 const auto add_image{[&](const auto& desc) {
131 for (u32 index = 0; index < desc.count; ++index) {
132 const auto handle{read_handle(desc, index)};
133 image_view_indices[image_view_index++] = handle.first;
134 }
135 }};
136 if constexpr (Spec::has_texture_buffers) {
137 for (const auto& desc : info.texture_buffer_descriptors) {
138 for (u32 index = 0; index < desc.count; ++index) {
139 const auto handle{read_handle(desc, index)};
140 image_view_indices[image_view_index++] = handle.first;
141 samplers[sampler_binding++] = 0;
142 }
143 }
144 }
145 if constexpr (Spec::has_image_buffers) {
146 for (const auto& desc : info.image_buffer_descriptors) {
147 add_image(desc);
148 }
149 }
150 for (const auto& desc : info.texture_descriptors) {
151 for (u32 index = 0; index < desc.count; ++index) {
152 const auto handle{read_handle(desc, index)};
153 image_view_indices[image_view_index++] = handle.first;
154
155 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
156 samplers[sampler_binding++] = sampler->Handle();
157 }
158 }
159 if constexpr (Spec::has_images) {
160 for (const auto& desc : info.image_descriptors) {
161 add_image(desc);
162 }
163 }
164 }};
165 if constexpr (Spec::enabled_stages[0]) {
166 config_stage(0);
167 }
168 if constexpr (Spec::enabled_stages[1]) {
169 config_stage(1);
170 }
171 if constexpr (Spec::enabled_stages[2]) {
172 config_stage(2);
173 }
174 if constexpr (Spec::enabled_stages[3]) {
175 config_stage(3);
176 }
177 if constexpr (Spec::enabled_stages[4]) {
178 config_stage(4);
179 }
180 const std::span indices_span(image_view_indices.data(), image_view_index);
181 texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
182
183 ImageId* texture_buffer_index{image_view_ids.data()};
184 const auto bind_stage_info{[&](size_t stage) {
185 size_t index{};
186 const auto add_buffer{[&](const auto& desc) {
187 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
188 for (u32 i = 0; i < desc.count; ++i) {
189 bool is_written{false};
190 if constexpr (is_image) {
191 is_written = desc.is_written;
192 }
193 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
194 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
195 image_view.BufferSize(), image_view.format,
196 is_written, is_image);
197 ++index;
198 ++texture_buffer_index;
199 }
200 }};
201 const Shader::Info& info{stage_infos[stage]};
202 buffer_cache.UnbindGraphicsTextureBuffers(stage);
203
204 if constexpr (Spec::has_texture_buffers) {
205 for (const auto& desc : info.texture_buffer_descriptors) {
206 add_buffer(desc);
207 }
208 }
209 if constexpr (Spec::has_image_buffers) {
210 for (const auto& desc : info.image_buffer_descriptors) {
211 add_buffer(desc);
212 }
213 }
214 for (const auto& desc : info.texture_descriptors) {
215 texture_buffer_index += desc.count;
216 }
217 if constexpr (Spec::has_images) {
218 for (const auto& desc : info.image_descriptors) {
219 texture_buffer_index += desc.count;
220 }
221 }
222 }};
223 if constexpr (Spec::enabled_stages[0]) {
224 bind_stage_info(0);
225 }
226 if constexpr (Spec::enabled_stages[1]) {
227 bind_stage_info(1);
228 }
229 if constexpr (Spec::enabled_stages[2]) {
230 bind_stage_info(2);
231 }
232 if constexpr (Spec::enabled_stages[3]) {
233 bind_stage_info(3);
234 }
235 if constexpr (Spec::enabled_stages[4]) {
236 bind_stage_info(4);
237 }
238 buffer_cache.UpdateGraphicsBuffers(is_indexed);
239 buffer_cache.BindHostGeometryBuffers(is_indexed);
240
241 const ImageId* views_it{image_view_ids.data()};
242 GLsizei texture_binding = 0;
243 GLsizei image_binding = 0;
244 std::array<GLuint, MAX_TEXTURES> textures;
245 std::array<GLuint, MAX_IMAGES> images;
246 const auto prepare_stage{[&](size_t stage) {
247 buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
248 buffer_cache.BindHostStageBuffers(stage);
249
250 texture_binding += num_texture_buffers[stage];
251 image_binding += num_image_buffers[stage];
252
253 const auto& info{stage_infos[stage]};
254 for (const auto& desc : info.texture_descriptors) {
255 for (u32 index = 0; index < desc.count; ++index) {
256 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
257 textures[texture_binding++] = image_view.Handle(desc.type);
258 }
259 }
260 for (const auto& desc : info.image_descriptors) {
261 for (u32 index = 0; index < desc.count; ++index) {
262 ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
263 images[image_binding++] = image_view.Handle(desc.type);
264 }
265 }
266 }};
267 if constexpr (Spec::enabled_stages[0]) {
268 prepare_stage(0);
269 }
270 if constexpr (Spec::enabled_stages[1]) {
271 prepare_stage(1);
272 }
273 if constexpr (Spec::enabled_stages[2]) {
274 prepare_stage(2);
275 }
276 if constexpr (Spec::enabled_stages[3]) {
277 prepare_stage(3);
278 }
279 if constexpr (Spec::enabled_stages[4]) {
280 prepare_stage(4);
281 }
282 if (texture_binding != 0) {
283 ASSERT(texture_binding == sampler_binding);
284 glBindTextures(0, texture_binding, textures.data());
285 glBindSamplers(0, sampler_binding, samplers.data());
286 }
287 if (image_binding != 0) {
288 glBindImageTextures(0, image_binding, images.data());
289 }
290 texture_cache.UpdateRenderTargets(false);
291
292 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
293 program_manager.BindProgram(program.handle);
294}
295
296} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h
new file mode 100644
index 000000000..5adf3f41e
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_graphics_program.h
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <type_traits>
9#include <utility>
10
11#include "common/bit_field.h"
12#include "common/common_types.h"
13#include "shader_recompiler/shader_info.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h"
16#include "video_core/renderer_opengl/gl_buffer_cache.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h"
18#include "video_core/renderer_opengl/gl_texture_cache.h"
19
20namespace OpenGL {
21
22class ProgramManager;
23
24using Maxwell = Tegra::Engines::Maxwell3D::Regs;
25
26struct GraphicsProgramKey {
27 struct TransformFeedbackState {
28 struct Layout {
29 u32 stream;
30 u32 varying_count;
31 u32 stride;
32 };
33 std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
34 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
35 };
36
37 std::array<u64, 6> unique_hashes;
38 union {
39 u32 raw;
40 BitField<0, 1, u32> xfb_enabled;
41 BitField<1, 1, u32> early_z;
42 BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
43 BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
44 BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
45 BitField<10, 1, u32> tessellation_clockwise;
46 };
47 std::array<u32, 3> padding;
48 TransformFeedbackState xfb_state;
49
50 size_t Hash() const noexcept;
51
52 bool operator==(const GraphicsProgramKey&) const noexcept;
53
54 bool operator!=(const GraphicsProgramKey& rhs) const noexcept {
55 return !operator==(rhs);
56 }
57
58 [[nodiscard]] size_t Size() const noexcept {
59 if (xfb_enabled != 0) {
60 return sizeof(GraphicsProgramKey);
61 } else {
62 return offsetof(GraphicsProgramKey, padding);
63 }
64 }
65};
66static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
67static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
68static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
69
70class GraphicsProgram {
71public:
72 explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
73 Tegra::MemoryManager& gpu_memory_,
74 Tegra::Engines::Maxwell3D& maxwell3d_,
75 ProgramManager& program_manager_, StateTracker& state_tracker_,
76 OGLProgram program_, const std::array<const Shader::Info*, 5>& infos);
77
78 void Configure(bool is_indexed);
79
80private:
81 TextureCache& texture_cache;
82 BufferCache& buffer_cache;
83 Tegra::MemoryManager& gpu_memory;
84 Tegra::Engines::Maxwell3D& maxwell3d;
85 ProgramManager& program_manager;
86 StateTracker& state_tracker;
87
88 OGLProgram program;
89 std::array<Shader::Info, 5> stage_infos{};
90 std::array<u32, 5> base_uniform_bindings{};
91 std::array<u32, 5> base_storage_bindings{};
92 std::array<u32, 5> num_texture_buffers{};
93 std::array<u32, 5> num_image_buffers{};
94};
95
96} // namespace OpenGL
97
98namespace std {
99template <>
100struct hash<OpenGL::GraphicsProgramKey> {
101 size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept {
102 return k.Hash();
103 }
104};
105} // namespace std
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index dd1937863..e527b76ba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
98 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), 98 texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
99 buffer_cache_runtime(device), 99 buffer_cache_runtime(device),
100 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), 100 buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
101 shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), 101 shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
102 buffer_cache, program_manager, state_tracker),
102 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), 103 query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
103 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} 104 fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
104 105
@@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
246 247
247 SyncState(); 248 SyncState();
248 249
249 // Setup shaders and their used resources. 250 GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()};
250 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
251 251
252 texture_cache.UpdateRenderTargets(false); 252 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
253 state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); 253 program->Configure(is_indexed);
254 program_manager.BindGraphicsPipeline();
255 254
256 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); 255 const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
257 BeginTransformFeedback(primitive_mode); 256 BeginTransformFeedback(primitive_mode);
@@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
293 num_instances, base_instance); 292 num_instances, base_instance);
294 } 293 }
295 } 294 }
296
297 EndTransformFeedback(); 295 EndTransformFeedback();
298 296
299 ++num_queued_commands; 297 ++num_queued_commands;
@@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
302} 300}
303 301
304void RasterizerOpenGL::DispatchCompute() { 302void RasterizerOpenGL::DispatchCompute() {
305 UNREACHABLE_MSG("Not implemented"); 303 ComputeProgram* const program{shader_cache.CurrentComputeProgram()};
304 if (!program) {
305 return;
306 }
307 program->Configure();
308 const auto& qmd{kepler_compute.launch_description};
309 glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
310 ++num_queued_commands;
306} 311}
307 312
308void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { 313void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
515 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); 520 // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
516 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); 521 // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
517 522
518 screen_info.display_texture = image_view->Handle(ImageViewType::e2D); 523 screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
519 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); 524 screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
520 return true; 525 return true;
521} 526}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index c3e490b40..c9ca1f005 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -16,6 +16,11 @@
16#include "common/scope_exit.h" 16#include "common/scope_exit.h"
17#include "core/core.h" 17#include "core/core.h"
18#include "core/frontend/emu_window.h" 18#include "core/frontend/emu_window.h"
19#include "shader_recompiler/backend/spirv/emit_spirv.h"
20#include "shader_recompiler/frontend/ir/program.h"
21#include "shader_recompiler/frontend/maxwell/control_flow.h"
22#include "shader_recompiler/frontend/maxwell/program.h"
23#include "shader_recompiler/profile.h"
19#include "video_core/engines/kepler_compute.h" 24#include "video_core/engines/kepler_compute.h"
20#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 26#include "video_core/engines/shader_type.h"
@@ -25,17 +30,281 @@
25#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
26#include "video_core/renderer_opengl/gl_state_tracker.h" 31#include "video_core/renderer_opengl/gl_state_tracker.h"
27#include "video_core/shader_cache.h" 32#include "video_core/shader_cache.h"
33#include "video_core/shader_environment.h"
28#include "video_core/shader_notify.h" 34#include "video_core/shader_notify.h"
29 35
30namespace OpenGL { 36namespace OpenGL {
37namespace {
38// FIXME: Move this somewhere else
39const Shader::Profile profile{
40 .supported_spirv = 0x00010000,
41
42 .unified_descriptor_binding = false,
43 .support_descriptor_aliasing = false,
44 .support_int8 = false,
45 .support_int16 = false,
46 .support_vertex_instance_id = true,
47 .support_float_controls = false,
48 .support_separate_denorm_behavior = false,
49 .support_separate_rounding_mode = false,
50 .support_fp16_denorm_preserve = false,
51 .support_fp32_denorm_preserve = false,
52 .support_fp16_denorm_flush = false,
53 .support_fp32_denorm_flush = false,
54 .support_fp16_signed_zero_nan_preserve = false,
55 .support_fp32_signed_zero_nan_preserve = false,
56 .support_fp64_signed_zero_nan_preserve = false,
57 .support_explicit_workgroup_layout = false,
58 .support_vote = true,
59 .support_viewport_index_layer_non_geometry = true,
60 .support_viewport_mask = true,
61 .support_typeless_image_loads = true,
62 .support_demote_to_helper_invocation = false,
63 .warp_size_potentially_larger_than_guest = true,
64 .support_int64_atomics = false,
65 .lower_left_origin_mode = true,
66
67 .has_broken_spirv_clamp = true,
68 .has_broken_unsigned_image_offsets = true,
69 .has_broken_signed_operations = true,
70 .ignore_nan_fp_comparisons = true,
71
72 .generic_input_types = {},
73 .convert_depth_mode = false,
74 .force_early_z = false,
75
76 .tess_primitive = {},
77 .tess_spacing = {},
78 .tess_clockwise = false,
79
80 .input_topology = Shader::InputTopology::Triangles,
81
82 .fixed_state_point_size = std::nullopt,
83
84 .alpha_test_func = Shader::CompareFunction::Always,
85 .alpha_test_reference = 0.0f,
86
87 .y_negate = false,
88
89 .xfb_varyings = {},
90};
91
92using Shader::Backend::SPIRV::EmitSPIRV;
93using Shader::Maxwell::TranslateProgram;
94using VideoCommon::ComputeEnvironment;
95using VideoCommon::GraphicsEnvironment;
96
97template <typename Container>
98auto MakeSpan(Container& container) {
99 return std::span(container.data(), container.size());
100}
101
102void AddShader(GLenum stage, GLuint program, std::span<const u32> code) {
103 OGLShader shader;
104 shader.handle = glCreateShader(stage);
105
106 glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
107 static_cast<GLsizei>(code.size_bytes()));
108 glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
109 glAttachShader(program, shader.handle);
110 if (!Settings::values.renderer_debug) {
111 return;
112 }
113 GLint shader_status{};
114 glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status);
115 if (shader_status == GL_FALSE) {
116 LOG_ERROR(Render_OpenGL, "Failed to build shader");
117 }
118 GLint log_length{};
119 glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length);
120 if (log_length == 0) {
121 return;
122 }
123 std::string log(log_length, 0);
124 glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data());
125 if (shader_status == GL_FALSE) {
126 LOG_ERROR(Render_OpenGL, "{}", log);
127 } else {
128 LOG_WARNING(Render_OpenGL, "{}", log);
129 }
130}
131
132void LinkProgram(GLuint program) {
133 glLinkProgram(program);
134 if (!Settings::values.renderer_debug) {
135 return;
136 }
137 GLint link_status{};
138 glGetProgramiv(program, GL_LINK_STATUS, &link_status);
139
140 GLint log_length{};
141 glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
142 if (log_length == 0) {
143 return;
144 }
145 std::string log(log_length, 0);
146 glGetProgramInfoLog(program, log_length, nullptr, log.data());
147 if (link_status == GL_FALSE) {
148 LOG_ERROR(Render_OpenGL, "{}", log);
149 } else {
150 LOG_WARNING(Render_OpenGL, "{}", log);
151 }
152}
153
154GLenum Stage(size_t stage_index) {
155 switch (stage_index) {
156 case 0:
157 return GL_VERTEX_SHADER;
158 case 1:
159 return GL_TESS_CONTROL_SHADER;
160 case 2:
161 return GL_TESS_EVALUATION_SHADER;
162 case 3:
163 return GL_GEOMETRY_SHADER;
164 case 4:
165 return GL_FRAGMENT_SHADER;
166 }
167 UNREACHABLE_MSG("{}", stage_index);
168 return GL_NONE;
169}
170} // Anonymous namespace
31 171
32ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 172ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
33 Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, 173 Tegra::Engines::Maxwell3D& maxwell3d_,
34 Tegra::Engines::KeplerCompute& kepler_compute_, 174 Tegra::Engines::KeplerCompute& kepler_compute_,
35 Tegra::MemoryManager& gpu_memory_, const Device& device_) 175 Tegra::MemoryManager& gpu_memory_, const Device& device_,
176 TextureCache& texture_cache_, BufferCache& buffer_cache_,
177 ProgramManager& program_manager_, StateTracker& state_tracker_)
36 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, 178 : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
37 emu_window{emu_window_}, gpu{gpu_}, device{device_} {} 179 emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
180 buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{
181 state_tracker_} {}
38 182
39ShaderCache::~ShaderCache() = default; 183ShaderCache::~ShaderCache() = default;
40 184
185GraphicsProgram* ShaderCache::CurrentGraphicsProgram() {
186 if (!RefreshStages(graphics_key.unique_hashes)) {
187 return nullptr;
188 }
189 const auto& regs{maxwell3d.regs};
190 graphics_key.raw = 0;
191 graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
192 graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
193 ? regs.draw.topology.Value()
194 : Maxwell::PrimitiveTopology{});
195 graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
196 graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
197 graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
198
199 const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
200 auto& program{pair->second};
201 if (is_new) {
202 program = CreateGraphicsProgram();
203 }
204 return program.get();
205}
206
207ComputeProgram* ShaderCache::CurrentComputeProgram() {
208 const VideoCommon::ShaderInfo* const shader{ComputeShader()};
209 if (!shader) {
210 return nullptr;
211 }
212 const auto& qmd{kepler_compute.launch_description};
213 const ComputeProgramKey key{
214 .unique_hash = shader->unique_hash,
215 .shared_memory_size = qmd.shared_alloc,
216 .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
217 };
218 const auto [pair, is_new]{compute_cache.try_emplace(key)};
219 auto& pipeline{pair->second};
220 if (!is_new) {
221 return pipeline.get();
222 }
223 pipeline = CreateComputeProgram(key, shader);
224 return pipeline.get();
225}
226
227std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram() {
228 GraphicsEnvironments environments;
229 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
230
231 main_pools.ReleaseContents();
232 return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true);
233}
234
235std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram(
236 ShaderPools& pools, const GraphicsProgramKey& key, std::span<Shader::Environment* const> envs,
237 bool build_in_parallel) {
238 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
239 size_t env_index{0};
240 std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
241 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
242 if (key.unique_hashes[index] == 0) {
243 continue;
244 }
245 Shader::Environment& env{*envs[env_index]};
246 ++env_index;
247
248 const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
249 Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
250 programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
251 }
252 std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
253
254 OGLProgram gl_program;
255 gl_program.handle = glCreateProgram();
256
257 Shader::Backend::SPIRV::Bindings binding;
258 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
259 if (key.unique_hashes[index] == 0) {
260 continue;
261 }
262 UNIMPLEMENTED_IF(index == 0);
263
264 Shader::IR::Program& program{programs[index]};
265 const size_t stage_index{index - 1};
266 infos[stage_index] = &program.info;
267
268 const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
269 FILE* file = fopen("D:\\shader.spv", "wb");
270 fwrite(code.data(), 4, code.size(), file);
271 fclose(file);
272 AddShader(Stage(stage_index), gl_program.handle, code);
273 }
274 LinkProgram(gl_program.handle);
275
276 return std::make_unique<GraphicsProgram>(texture_cache, buffer_cache, gpu_memory, maxwell3d,
277 program_manager, state_tracker, std::move(gl_program),
278 infos);
279}
280
281std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(
282 const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) {
283 const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
284 const auto& qmd{kepler_compute.launch_description};
285 ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
286 env.SetCachedSize(shader->size_bytes);
287
288 main_pools.ReleaseContents();
289 return CreateComputeProgram(main_pools, key, env, true);
290}
291
292std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(ShaderPools& pools,
293 const ComputeProgramKey& key,
294 Shader::Environment& env,
295 bool build_in_parallel) {
296 LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
297
298 Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
299 Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
300 Shader::Backend::SPIRV::Bindings binding;
301 const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
302 OGLProgram gl_program;
303 gl_program.handle = glCreateProgram();
304 AddShader(GL_COMPUTE_SHADER, gl_program.handle, code);
305 LinkProgram(gl_program.handle);
306 return std::make_unique<ComputeProgram>(texture_cache, buffer_cache, gpu_memory, kepler_compute,
307 program_manager, std::move(gl_program), program.info);
308}
309
41} // namespace OpenGL 310} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 96520e17c..b479d073a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,20 +5,18 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <bitset>
10#include <memory>
11#include <string>
12#include <tuple>
13#include <unordered_map> 8#include <unordered_map>
14#include <unordered_set>
15#include <vector>
16 9
17#include <glad/glad.h> 10#include <glad/glad.h>
18 11
19#include "common/common_types.h" 12#include "common/common_types.h"
13#include "shader_recompiler/frontend/ir/basic_block.h"
14#include "shader_recompiler/frontend/ir/value.h"
15#include "shader_recompiler/frontend/maxwell/control_flow.h"
16#include "shader_recompiler/object_pool.h"
20#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h" 18#include "video_core/renderer_opengl/gl_compute_program.h"
19#include "video_core/renderer_opengl/gl_graphics_program.h"
22#include "video_core/shader_cache.h" 20#include "video_core/shader_cache.h"
23 21
24namespace Tegra { 22namespace Tegra {
@@ -32,64 +30,62 @@ class EmuWindow;
32namespace OpenGL { 30namespace OpenGL {
33 31
34class Device; 32class Device;
33class ProgramManager;
35class RasterizerOpenGL; 34class RasterizerOpenGL;
36 35
37using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36struct ShaderPools {
38 37 void ReleaseContents() {
39struct GraphicsProgramKey { 38 flow_block.ReleaseContents();
40 struct TransformFeedbackState { 39 block.ReleaseContents();
41 struct Layout { 40 inst.ReleaseContents();
42 u32 stream;
43 u32 varying_count;
44 u32 stride;
45 };
46 std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
47 std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
48 };
49
50 std::array<u64, 6> unique_hashes;
51 std::array<u8, Maxwell::NumRenderTargets> color_formats;
52 union {
53 u32 raw;
54 BitField<0, 1, u32> xfb_enabled;
55 BitField<1, 1, u32> early_z;
56 BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
57 BitField<6, 2, u32> tessellation_primitive;
58 BitField<8, 2, u32> tessellation_spacing;
59 BitField<10, 1, u32> tessellation_clockwise;
60 };
61 u32 padding;
62 TransformFeedbackState xfb_state;
63
64 [[nodiscard]] size_t Size() const noexcept {
65 if (xfb_enabled != 0) {
66 return sizeof(GraphicsProgramKey);
67 } else {
68 return offsetof(GraphicsProgramKey, padding);
69 }
70 } 41 }
71};
72static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
73static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
74static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
75 42
76class GraphicsProgram { 43 Shader::ObjectPool<Shader::IR::Inst> inst;
77public: 44 Shader::ObjectPool<Shader::IR::Block> block;
78private: 45 Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
79}; 46};
80 47
81class ShaderCache : public VideoCommon::ShaderCache { 48class ShaderCache : public VideoCommon::ShaderCache {
82public: 49public:
83 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, 50 explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
84 Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, 51 Tegra::Engines::Maxwell3D& maxwell3d_,
85 Tegra::Engines::KeplerCompute& kepler_compute_, 52 Tegra::Engines::KeplerCompute& kepler_compute_,
86 Tegra::MemoryManager& gpu_memory_, const Device& device_); 53 Tegra::MemoryManager& gpu_memory_, const Device& device_,
54 TextureCache& texture_cache_, BufferCache& buffer_cache_,
55 ProgramManager& program_manager_, StateTracker& state_tracker_);
87 ~ShaderCache(); 56 ~ShaderCache();
88 57
58 [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram();
59
60 [[nodiscard]] ComputeProgram* CurrentComputeProgram();
61
89private: 62private:
63 std::unique_ptr<GraphicsProgram> CreateGraphicsProgram();
64
65 std::unique_ptr<GraphicsProgram> CreateGraphicsProgram(
66 ShaderPools& pools, const GraphicsProgramKey& key,
67 std::span<Shader::Environment* const> envs, bool build_in_parallel);
68
69 std::unique_ptr<ComputeProgram> CreateComputeProgram(const ComputeProgramKey& key,
70 const VideoCommon::ShaderInfo* shader);
71
72 std::unique_ptr<ComputeProgram> CreateComputeProgram(ShaderPools& pools,
73 const ComputeProgramKey& key,
74 Shader::Environment& env,
75 bool build_in_parallel);
76
90 Core::Frontend::EmuWindow& emu_window; 77 Core::Frontend::EmuWindow& emu_window;
91 Tegra::GPU& gpu;
92 const Device& device; 78 const Device& device;
79 TextureCache& texture_cache;
80 BufferCache& buffer_cache;
81 ProgramManager& program_manager;
82 StateTracker& state_tracker;
83
84 GraphicsProgramKey graphics_key{};
85
86 ShaderPools main_pools;
87 std::unordered_map<GraphicsProgramKey, std::unique_ptr<GraphicsProgram>> graphics_cache;
88 std::unordered_map<ComputeProgramKey, std::unique_ptr<ComputeProgram>> compute_cache;
93}; 89};
94 90
95} // namespace OpenGL 91} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 553e6e8d6..399959afb 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,149 +1,3 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_opengl/gl_device.h"
10#include "video_core/renderer_opengl/gl_shader_manager.h"
11
12namespace OpenGL {
13
14namespace {
15
16void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
17 if (current == old) {
18 return;
19 }
20 if (current == 0) {
21 if (enabled) {
22 enabled = false;
23 glDisable(stage);
24 }
25 return;
26 }
27 if (!enabled) {
28 enabled = true;
29 glEnable(stage);
30 }
31 glBindProgramARB(stage, current);
32}
33
34} // Anonymous namespace
35
36ProgramManager::ProgramManager(const Device& device)
37 : use_assembly_programs{device.UseAssemblyShaders()} {
38 if (use_assembly_programs) {
39 glEnable(GL_COMPUTE_PROGRAM_NV);
40 } else {
41 graphics_pipeline.Create();
42 glBindProgramPipeline(graphics_pipeline.handle);
43 }
44}
45
46ProgramManager::~ProgramManager() = default;
47
48void ProgramManager::BindCompute(GLuint program) {
49 if (use_assembly_programs) {
50 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
51 } else {
52 is_graphics_bound = false;
53 glUseProgram(program);
54 }
55}
56
57void ProgramManager::BindGraphicsPipeline() {
58 if (!use_assembly_programs) {
59 UpdateSourcePrograms();
60 }
61}
62
63void ProgramManager::BindHostPipeline(GLuint pipeline) {
64 if (use_assembly_programs) {
65 if (geometry_enabled) {
66 geometry_enabled = false;
67 old_state.geometry = 0;
68 glDisable(GL_GEOMETRY_PROGRAM_NV);
69 }
70 } else {
71 if (!is_graphics_bound) {
72 glUseProgram(0);
73 }
74 }
75 glBindProgramPipeline(pipeline);
76}
77
78void ProgramManager::RestoreGuestPipeline() {
79 if (use_assembly_programs) {
80 glBindProgramPipeline(0);
81 } else {
82 glBindProgramPipeline(graphics_pipeline.handle);
83 }
84}
85
86void ProgramManager::BindHostCompute(GLuint program) {
87 if (use_assembly_programs) {
88 glDisable(GL_COMPUTE_PROGRAM_NV);
89 }
90 glUseProgram(program);
91 is_graphics_bound = false;
92}
93
94void ProgramManager::RestoreGuestCompute() {
95 if (use_assembly_programs) {
96 glEnable(GL_COMPUTE_PROGRAM_NV);
97 glUseProgram(0);
98 }
99}
100
101void ProgramManager::UseVertexShader(GLuint program) {
102 if (use_assembly_programs) {
103 BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
104 }
105 current_state.vertex = program;
106}
107
108void ProgramManager::UseGeometryShader(GLuint program) {
109 if (use_assembly_programs) {
110 BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
111 }
112 current_state.geometry = program;
113}
114
115void ProgramManager::UseFragmentShader(GLuint program) {
116 if (use_assembly_programs) {
117 BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
118 }
119 current_state.fragment = program;
120}
121
122void ProgramManager::UpdateSourcePrograms() {
123 if (!is_graphics_bound) {
124 is_graphics_bound = true;
125 glUseProgram(0);
126 }
127
128 const GLuint handle = graphics_pipeline.handle;
129 const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
130 if (current == old) {
131 return;
132 }
133 glUseProgramStages(handle, stage, current);
134 };
135 update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
136 update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
137 update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
138
139 old_state = current_state;
140}
141
142void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
143 const auto& regs = maxwell.regs;
144
145 // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
146 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
147}
148
149} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index ad42cce74..70781d6f5 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -4,79 +4,24 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8
9#include <glad/glad.h> 7#include <glad/glad.h>
10 8
11#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/maxwell_to_gl.h"
13
14namespace OpenGL { 9namespace OpenGL {
15 10
16class Device;
17
18/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
19/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
20/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
21/// Not following that rule will cause problems on some AMD drivers.
22struct alignas(16) MaxwellUniformData {
23 void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
24
25 GLfloat y_direction;
26};
27static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
28static_assert(sizeof(MaxwellUniformData) < 16384,
29 "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
30
31class ProgramManager { 11class ProgramManager {
32public: 12public:
33 explicit ProgramManager(const Device& device); 13 void BindProgram(GLuint program) {
34 ~ProgramManager(); 14 if (bound_program == program) {
35 15 return;
36 /// Binds a compute program 16 }
37 void BindCompute(GLuint program); 17 bound_program = program;
38 18 glUseProgram(program);
39 /// Updates bound programs. 19 }
40 void BindGraphicsPipeline();
41
42 /// Binds an OpenGL pipeline object unsynchronized with the guest state.
43 void BindHostPipeline(GLuint pipeline);
44 20
45 /// Rewinds BindHostPipeline state changes. 21 void RestoreGuestCompute() {}
46 void RestoreGuestPipeline();
47
48 /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
49 void BindHostCompute(GLuint program);
50
51 /// Rewinds BindHostCompute state changes.
52 void RestoreGuestCompute();
53
54 void UseVertexShader(GLuint program);
55 void UseGeometryShader(GLuint program);
56 void UseFragmentShader(GLuint program);
57 22
58private: 23private:
59 struct PipelineState { 24 GLuint bound_program = 0;
60 GLuint vertex = 0;
61 GLuint geometry = 0;
62 GLuint fragment = 0;
63 };
64
65 /// Update GLSL programs.
66 void UpdateSourcePrograms();
67
68 OGLPipeline graphics_pipeline;
69
70 PipelineState current_state;
71 PipelineState old_state;
72
73 bool use_assembly_programs = false;
74
75 bool is_graphics_bound = true;
76
77 bool vertex_enabled = false;
78 bool geometry_enabled = false;
79 bool fragment_enabled = false;
80}; 25};
81 26
82} // namespace OpenGL 27} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index a8bf84218..7053be161 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,9 +24,7 @@
24#include "video_core/textures/decoders.h" 24#include "video_core/textures/decoders.h"
25 25
26namespace OpenGL { 26namespace OpenGL {
27
28namespace { 27namespace {
29
30using Tegra::Texture::SwizzleSource; 28using Tegra::Texture::SwizzleSource;
31using Tegra::Texture::TextureMipmapFilter; 29using Tegra::Texture::TextureMipmapFilter;
32using Tegra::Texture::TextureType; 30using Tegra::Texture::TextureType;
@@ -59,107 +57,6 @@ struct CopyRegion {
59 GLsizei depth; 57 GLsizei depth;
60}; 58};
61 59
62struct FormatTuple {
63 GLenum internal_format;
64 GLenum format = GL_NONE;
65 GLenum type = GL_NONE;
66};
67
68constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
69 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
70 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
71 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
72 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
73 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
74 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
75 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
76 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
77 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
78 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
79 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
80 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
81 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
82 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
83 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
84 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
85 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
86 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
87 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
88 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
89 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
90 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
91 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
92 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
93 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
94 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
95 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
96 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
97 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
98 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
99 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
100 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
101 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
102 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
103 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
104 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
105 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
106 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
107 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
108 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
109 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
110 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
111 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
112 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
113 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
114 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
115 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
116 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
117 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
118 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
119 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
120 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
121 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
122 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
123 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
124 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
125 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
126 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
127 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
128 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
129 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
130 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
131 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
132 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
133 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
134 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
135 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
136 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
137 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
138 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
139 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
140 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
141 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
142 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
143 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
144 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
145 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
146 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
147 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
148 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
149 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
150 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
151 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
152 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
153 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
154 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
155 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
156 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
157 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
158 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
159 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
160 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
161}};
162
163constexpr std::array ACCELERATED_FORMATS{ 60constexpr std::array ACCELERATED_FORMATS{
164 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, 61 GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
165 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, 62 GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
170 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, 67 GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
171}; 68};
172 69
173const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
174 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
175 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
176}
177
178GLenum ImageTarget(const VideoCommon::ImageInfo& info) { 70GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
179 switch (info.type) { 71 switch (info.type) {
180 case ImageType::e1D: 72 case ImageType::e1D:
@@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
195 return GL_NONE; 87 return GL_NONE;
196} 88}
197 89
198GLenum ImageTarget(ImageViewType type, int num_samples = 1) { 90GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
199 const bool is_multisampled = num_samples > 1; 91 const bool is_multisampled = num_samples > 1;
200 switch (type) { 92 switch (type) {
201 case ImageViewType::e1D: 93 case Shader::TextureType::Color1D:
202 return GL_TEXTURE_1D; 94 return GL_TEXTURE_1D;
203 case ImageViewType::e2D: 95 case Shader::TextureType::Color2D:
204 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; 96 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
205 case ImageViewType::Cube: 97 case Shader::TextureType::ColorCube:
206 return GL_TEXTURE_CUBE_MAP; 98 return GL_TEXTURE_CUBE_MAP;
207 case ImageViewType::e3D: 99 case Shader::TextureType::Color3D:
208 return GL_TEXTURE_3D; 100 return GL_TEXTURE_3D;
209 case ImageViewType::e1DArray: 101 case Shader::TextureType::ColorArray1D:
210 return GL_TEXTURE_1D_ARRAY; 102 return GL_TEXTURE_1D_ARRAY;
211 case ImageViewType::e2DArray: 103 case Shader::TextureType::ColorArray2D:
212 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; 104 return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
213 case ImageViewType::CubeArray: 105 case Shader::TextureType::ColorArrayCube:
214 return GL_TEXTURE_CUBE_MAP_ARRAY; 106 return GL_TEXTURE_CUBE_MAP_ARRAY;
215 case ImageViewType::Rect: 107 case Shader::TextureType::Buffer:
216 return GL_TEXTURE_RECTANGLE;
217 case ImageViewType::Buffer:
218 return GL_TEXTURE_BUFFER; 108 return GL_TEXTURE_BUFFER;
219 } 109 }
220 UNREACHABLE_MSG("Invalid image view type={}", type); 110 UNREACHABLE_MSG("Invalid image view type={}", type);
@@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
322 default: 212 default:
323 return false; 213 return false;
324 } 214 }
325 const GLenum internal_format = GetFormatTuple(info.format).internal_format; 215 const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
326 const auto& format_info = runtime.FormatInfo(info.type, internal_format); 216 const auto& format_info = runtime.FormatInfo(info.type, internal_format);
327 if (format_info.is_compressed) { 217 if (format_info.is_compressed) {
328 return false; 218 return false;
@@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
414 304
415void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { 305void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
416 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { 306 if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
417 const GLuint texture = image_view->DefaultHandle(); 307 glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
418 glNamedFramebufferTexture(fbo, attachment, texture, 0);
419 return; 308 return;
420 } 309 }
421 const GLuint texture = image_view->Handle(ImageViewType::e3D); 310 const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
422 if (image_view->range.extent.layers > 1) { 311 if (image_view->range.extent.layers > 1) {
423 // TODO: OpenGL doesn't support rendering to a fixed number of slices 312 // TODO: OpenGL doesn't support rendering to a fixed number of slices
424 glNamedFramebufferTexture(fbo, attachment, texture, 0); 313 glNamedFramebufferTexture(fbo, attachment, texture, 0);
@@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
453 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; 342 static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
454 for (size_t i = 0; i < TARGETS.size(); ++i) { 343 for (size_t i = 0; i < TARGETS.size(); ++i) {
455 const GLenum target = TARGETS[i]; 344 const GLenum target = TARGETS[i];
456 for (const FormatTuple& tuple : FORMAT_TABLE) { 345 for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
457 const GLenum format = tuple.internal_format; 346 const GLenum format = tuple.internal_format;
458 GLint compat_class; 347 GLint compat_class;
459 GLint compat_type; 348 GLint compat_type;
@@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
475 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); 364 null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
476 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); 365 null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
477 null_image_3d.Create(GL_TEXTURE_3D); 366 null_image_3d.Create(GL_TEXTURE_3D);
478 null_image_rect.Create(GL_TEXTURE_RECTANGLE);
479 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); 367 glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
480 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); 368 glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
481 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); 369 glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
482 glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
483 370
484 std::array<GLuint, 4> new_handles; 371 std::array<GLuint, 4> new_handles;
485 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); 372 glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
496 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, 383 glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
497 GL_R8, 0, 1, 0, 6); 384 GL_R8, 0, 1, 0, 6);
498 const std::array texture_handles{ 385 const std::array texture_handles{
499 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, 386 null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
500 null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, 387 null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
501 null_image_view_2d_array.handle, null_image_view_cube.handle, 388 null_image_view_cube.handle,
502 }; 389 };
503 for (const GLuint handle : texture_handles) { 390 for (const GLuint handle : texture_handles) {
504 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; 391 static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
505 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); 392 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
506 } 393 }
507 const auto set_view = [this](ImageViewType type, GLuint handle) { 394 const auto set_view = [this](Shader::TextureType type, GLuint handle) {
508 if (device.HasDebuggingToolAttached()) { 395 if (device.HasDebuggingToolAttached()) {
509 const std::string name = fmt::format("NullImage {}", type); 396 const std::string name = fmt::format("NullImage {}", type);
510 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); 397 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
511 } 398 }
512 null_image_views[static_cast<size_t>(type)] = handle; 399 null_image_views[static_cast<size_t>(type)] = handle;
513 }; 400 };
514 set_view(ImageViewType::e1D, null_image_view_1d.handle); 401 set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
515 set_view(ImageViewType::e2D, null_image_view_2d.handle); 402 set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
516 set_view(ImageViewType::Cube, null_image_view_cube.handle); 403 set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
517 set_view(ImageViewType::e3D, null_image_3d.handle); 404 set_view(Shader::TextureType::Color3D, null_image_3d.handle);
518 set_view(ImageViewType::e1DArray, null_image_1d_array.handle); 405 set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
519 set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); 406 set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
520 set_view(ImageViewType::CubeArray, null_image_cube_array.handle); 407 set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
521 set_view(ImageViewType::Rect, null_image_rect.handle);
522} 408}
523 409
524TextureCacheRuntime::~TextureCacheRuntime() = default; 410TextureCacheRuntime::~TextureCacheRuntime() = default;
@@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
710 gl_format = GL_RGBA; 596 gl_format = GL_RGBA;
711 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 597 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
712 } else { 598 } else {
713 const auto& tuple = GetFormatTuple(info.format); 599 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
714 gl_internal_format = tuple.internal_format; 600 gl_internal_format = tuple.internal_format;
715 gl_format = tuple.format; 601 gl_format = tuple.format;
716 gl_type = tuple.type; 602 gl_type = tuple.type;
@@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
750 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); 636 glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
751 break; 637 break;
752 case GL_TEXTURE_BUFFER: 638 case GL_TEXTURE_BUFFER:
753 buffer.Create(); 639 UNREACHABLE();
754 glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
755 break; 640 break;
756 default: 641 default:
757 UNREACHABLE_MSG("Invalid target=0x{:x}", target); 642 UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
789 } 674 }
790} 675}
791 676
792void Image::UploadMemory(const ImageBufferMap& map,
793 std::span<const VideoCommon::BufferCopy> copies) {
794 for (const VideoCommon::BufferCopy& copy : copies) {
795 glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
796 copy.dst_offset, copy.size);
797 }
798}
799
800void Image::DownloadMemory(ImageBufferMap& map, 677void Image::DownloadMemory(ImageBufferMap& map,
801 std::span<const VideoCommon::BufferImageCopy> copies) { 678 std::span<const VideoCommon::BufferImageCopy> copies) {
802 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API 679 glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
958 if (True(image.flags & ImageFlagBits::Converted)) { 835 if (True(image.flags & ImageFlagBits::Converted)) {
959 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 836 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
960 } else { 837 } else {
961 internal_format = GetFormatTuple(format).internal_format; 838 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
962 } 839 }
963 VideoCommon::SubresourceRange flatten_range = info.range; 840 VideoCommon::SubresourceRange flatten_range = info.range;
964 std::array<GLuint, 2> handles; 841 std::array<GLuint, 2> handles;
@@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
970 [[fallthrough]]; 847 [[fallthrough]];
971 case ImageViewType::e1D: 848 case ImageViewType::e1D:
972 glGenTextures(2, handles.data()); 849 glGenTextures(2, handles.data());
973 SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); 850 SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range);
974 SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); 851 SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range);
975 break; 852 break;
976 case ImageViewType::e2DArray: 853 case ImageViewType::e2DArray:
977 flatten_range.extent.layers = 1; 854 flatten_range.extent.layers = 1;
@@ -985,62 +862,84 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
985 .extent = {.levels = 1, .layers = 1}, 862 .extent = {.levels = 1, .layers = 1},
986 }; 863 };
987 glGenTextures(1, handles.data()); 864 glGenTextures(1, handles.data());
988 SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); 865 SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range);
989 break; 866 } else {
867 glGenTextures(2, handles.data());
868 SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range);
869 SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info,
870 info.range);
990 } 871 }
991 glGenTextures(2, handles.data());
992 SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
993 SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
994 break; 872 break;
995 case ImageViewType::e3D: 873 case ImageViewType::e3D:
996 glGenTextures(1, handles.data()); 874 glGenTextures(1, handles.data());
997 SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); 875 SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range);
998 break; 876 break;
999 case ImageViewType::CubeArray: 877 case ImageViewType::CubeArray:
1000 flatten_range.extent.layers = 6; 878 flatten_range.extent.layers = 6;
1001 [[fallthrough]]; 879 [[fallthrough]];
1002 case ImageViewType::Cube: 880 case ImageViewType::Cube:
1003 glGenTextures(2, handles.data()); 881 glGenTextures(2, handles.data());
1004 SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); 882 SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range);
1005 SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); 883 SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range);
1006 break; 884 break;
1007 case ImageViewType::Rect: 885 case ImageViewType::Rect:
1008 glGenTextures(1, handles.data()); 886 UNIMPLEMENTED();
1009 SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
1010 break; 887 break;
1011 case ImageViewType::Buffer: 888 case ImageViewType::Buffer:
1012 glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); 889 UNREACHABLE();
1013 SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); 890 break;
891 }
892 switch (info.type) {
893 case ImageViewType::e1D:
894 default_handle = Handle(Shader::TextureType::Color1D);
895 break;
896 case ImageViewType::e1DArray:
897 default_handle = Handle(Shader::TextureType::ColorArray1D);
898 break;
899 case ImageViewType::e2D:
900 default_handle = Handle(Shader::TextureType::Color2D);
901 break;
902 case ImageViewType::e2DArray:
903 default_handle = Handle(Shader::TextureType::ColorArray2D);
904 break;
905 case ImageViewType::e3D:
906 default_handle = Handle(Shader::TextureType::Color3D);
907 break;
908 case ImageViewType::Cube:
909 default_handle = Handle(Shader::TextureType::ColorCube);
910 break;
911 case ImageViewType::CubeArray:
912 default_handle = Handle(Shader::TextureType::ColorArrayCube);
913 break;
914 default:
1014 break; 915 break;
1015 } 916 }
1016 default_handle = Handle(info.type);
1017} 917}
1018 918
1019ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, 919ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
920 const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
921 : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
922 buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
923
924ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
1020 const VideoCommon::ImageViewInfo& view_info) 925 const VideoCommon::ImageViewInfo& view_info)
1021 : VideoCommon::ImageViewBase{info, view_info} {} 926 : VideoCommon::ImageViewBase{info, view_info} {}
1022 927
1023ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) 928ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
1024 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} 929 : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
1025 930
1026void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, 931void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type,
1027 GLuint handle, const VideoCommon::ImageViewInfo& info, 932 GLuint handle, const VideoCommon::ImageViewInfo& info,
1028 VideoCommon::SubresourceRange view_range) { 933 VideoCommon::SubresourceRange view_range) {
1029 if (info.type == ImageViewType::Buffer) { 934 const GLuint parent = image.texture.handle;
1030 // TODO: Take offset from buffer cache 935 const GLenum target = ImageTarget(view_type, image.info.num_samples);
1031 glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, 936 glTextureView(handle, target, parent, internal_format, view_range.base.level,
1032 image.guest_size_bytes); 937 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
1033 } else { 938 if (!info.IsRenderTarget()) {
1034 const GLuint parent = image.texture.handle; 939 ApplySwizzle(handle, format, info.Swizzle());
1035 const GLenum target = ImageTarget(view_type, image.info.num_samples);
1036 glTextureView(handle, target, parent, internal_format, view_range.base.level,
1037 view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
1038 if (!info.IsRenderTarget()) {
1039 ApplySwizzle(handle, format, info.Swizzle());
1040 }
1041 } 940 }
1042 if (device.HasDebuggingToolAttached()) { 941 if (device.HasDebuggingToolAttached()) {
1043 const std::string name = VideoCommon::Name(*this, view_type); 942 const std::string name = VideoCommon::Name(*this);
1044 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); 943 glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
1045 } 944 }
1046 stored_views.emplace_back().handle = handle; 945 stored_views.emplace_back().handle = handle;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 817b0e650..2e3e02b79 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -9,6 +9,7 @@
9 9
10#include <glad/glad.h> 10#include <glad/glad.h>
11 11
12#include "shader_recompiler/shader_info.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/util_shaders.h" 14#include "video_core/renderer_opengl/util_shaders.h"
14#include "video_core/texture_cache/texture_cache.h" 15#include "video_core/texture_cache/texture_cache.h"
@@ -127,13 +128,12 @@ private:
127 OGLTexture null_image_1d_array; 128 OGLTexture null_image_1d_array;
128 OGLTexture null_image_cube_array; 129 OGLTexture null_image_cube_array;
129 OGLTexture null_image_3d; 130 OGLTexture null_image_3d;
130 OGLTexture null_image_rect;
131 OGLTextureView null_image_view_1d; 131 OGLTextureView null_image_view_1d;
132 OGLTextureView null_image_view_2d; 132 OGLTextureView null_image_view_2d;
133 OGLTextureView null_image_view_2d_array; 133 OGLTextureView null_image_view_2d_array;
134 OGLTextureView null_image_view_cube; 134 OGLTextureView null_image_view_cube;
135 135
136 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; 136 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
137}; 137};
138 138
139class Image : public VideoCommon::ImageBase { 139class Image : public VideoCommon::ImageBase {
@@ -154,8 +154,6 @@ public:
154 void UploadMemory(const ImageBufferMap& map, 154 void UploadMemory(const ImageBufferMap& map,
155 std::span<const VideoCommon::BufferImageCopy> copies); 155 std::span<const VideoCommon::BufferImageCopy> copies);
156 156
157 void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
158
159 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); 157 void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
160 158
161 GLuint StorageHandle() noexcept; 159 GLuint StorageHandle() noexcept;
@@ -170,7 +168,6 @@ private:
170 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); 168 void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
171 169
172 OGLTexture texture; 170 OGLTexture texture;
173 OGLBuffer buffer;
174 OGLTextureView store_view; 171 OGLTextureView store_view;
175 GLenum gl_internal_format = GL_NONE; 172 GLenum gl_internal_format = GL_NONE;
176 GLenum gl_format = GL_NONE; 173 GLenum gl_format = GL_NONE;
@@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase {
182 179
183public: 180public:
184 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); 181 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
182 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
183 const VideoCommon::ImageViewInfo&, GPUVAddr);
185 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, 184 explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
186 const VideoCommon::ImageViewInfo& view_info); 185 const VideoCommon::ImageViewInfo& view_info);
187 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); 186 explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
188 187
189 [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { 188 [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
190 return views[static_cast<size_t>(query_type)]; 189 return views[static_cast<size_t>(handle_type)];
191 } 190 }
192 191
193 [[nodiscard]] GLuint DefaultHandle() const noexcept { 192 [[nodiscard]] GLuint DefaultHandle() const noexcept {
@@ -198,15 +197,25 @@ public:
198 return internal_format; 197 return internal_format;
199 } 198 }
200 199
200 [[nodiscard]] GPUVAddr GpuAddr() const noexcept {
201 return gpu_addr;
202 }
203
204 [[nodiscard]] u32 BufferSize() const noexcept {
205 return buffer_size;
206 }
207
201private: 208private:
202 void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, 209 void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle,
203 const VideoCommon::ImageViewInfo& info, 210 const VideoCommon::ImageViewInfo& info,
204 VideoCommon::SubresourceRange view_range); 211 VideoCommon::SubresourceRange view_range);
205 212
206 std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; 213 std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
207 std::vector<OGLTextureView> stored_views; 214 std::vector<OGLTextureView> stored_views;
208 GLuint default_handle = 0;
209 GLenum internal_format = GL_NONE; 215 GLenum internal_format = GL_NONE;
216 GLuint default_handle = 0;
217 GPUVAddr gpu_addr = 0;
218 u32 buffer_size = 0;
210}; 219};
211 220
212class ImageAlloc : public VideoCommon::ImageAllocBase {}; 221class ImageAlloc : public VideoCommon::ImageAllocBase {};
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index f7ad8f370..672f94bfc 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -5,12 +5,120 @@
5#pragma once 5#pragma once
6 6
7#include <glad/glad.h> 7#include <glad/glad.h>
8
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/surface.h"
9 11
10namespace OpenGL::MaxwellToGL { 12namespace OpenGL::MaxwellToGL {
11 13
12using Maxwell = Tegra::Engines::Maxwell3D::Regs; 14using Maxwell = Tegra::Engines::Maxwell3D::Regs;
13 15
16struct FormatTuple {
17 GLenum internal_format;
18 GLenum format = GL_NONE;
19 GLenum type = GL_NONE;
20};
21
22constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
23 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
24 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
25 {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
26 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
27 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
28 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
29 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
30 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
31 {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
32 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
33 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
34 {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
35 {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
36 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
37 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
38 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
39 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
40 {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
41 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
42 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
43 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
44 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
45 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
46 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
47 {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
48 {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
49 {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
50 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
51 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
52 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
53 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
54 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
55 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
56 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
57 {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
58 {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
59 {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
60 {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
61 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
62 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
63 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
64 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
65 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
66 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
67 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
68 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
69 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
70 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
71 {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
72 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
73 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
74 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
75 {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
76 {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
77 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
78 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
79 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
80 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
81 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
82 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
83 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
84 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
85 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
86 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
87 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
88 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
89 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
90 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
91 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
92 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
93 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
94 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
95 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
96 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
97 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
98 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
99 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
100 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
102 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
103 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
104 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
105 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
106 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
107 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
108 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
109 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
110 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
111 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
112 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
113 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
114 GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
115}};
116
117inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
118 ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
119 return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
120}
121
14inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { 122inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
15 switch (attrib.type) { 123 switch (attrib.type) {
16 case Maxwell::VertexAttribute::Type::UnsignedNorm: 124 case Maxwell::VertexAttribute::Type::UnsignedNorm:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index c12929de6..4e77ef808 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
130 std::unique_ptr<Core::Frontend::GraphicsContext> context_) 130 std::unique_ptr<Core::Frontend::GraphicsContext> context_)
131 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, 131 : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
132 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, 132 emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
133 program_manager{device},
134 rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { 133 rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
135 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { 134 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
136 glEnable(GL_DEBUG_OUTPUT); 135 glEnable(GL_DEBUG_OUTPUT);
@@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() {
236 OGLShader fragment_shader; 235 OGLShader fragment_shader;
237 fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); 236 fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
238 237
239 vertex_program.Create(true, false, vertex_shader.handle); 238 present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle);
240 fragment_program.Create(true, false, fragment_shader.handle);
241
242 pipeline.Create();
243 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
244 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
245 239
246 // Generate presentation sampler 240 // Generate presentation sampler
247 present_sampler.Create(); 241 present_sampler.Create();
@@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
342 // Set projection matrix 336 // Set projection matrix
343 const std::array ortho_matrix = 337 const std::array ortho_matrix =
344 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); 338 MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
345 glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, 339 program_manager.BindProgram(present_program.handle);
346 std::data(ortho_matrix)); 340 glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
347 341
348 const auto& texcoords = screen_info.display_texcoords; 342 const auto& texcoords = screen_info.display_texcoords;
349 auto left = texcoords.left; 343 auto left = texcoords.left;
@@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
404 state_tracker.NotifyClipControl(); 398 state_tracker.NotifyClipControl();
405 state_tracker.NotifyAlphaTest(); 399 state_tracker.NotifyAlphaTest();
406 400
407 program_manager.BindHostPipeline(pipeline.handle);
408
409 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); 401 state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
410 glEnable(GL_CULL_FACE); 402 glEnable(GL_CULL_FACE);
411 if (screen_info.display_srgb) { 403 if (screen_info.display_srgb) {
@@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
453 glClear(GL_COLOR_BUFFER_BIT); 445 glClear(GL_COLOR_BUFFER_BIT);
454 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 446 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
455 447
456 program_manager.RestoreGuestPipeline(); 448 // TODO
449 // program_manager.RestoreGuestPipeline();
457} 450}
458 451
459void RendererOpenGL::RenderScreenshot() { 452void RendererOpenGL::RenderScreenshot() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 0b66f8332..b3ee55665 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,7 +12,6 @@
12#include "video_core/renderer_opengl/gl_device.h" 12#include "video_core/renderer_opengl/gl_device.h"
13#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_shader_manager.h"
16#include "video_core/renderer_opengl/gl_state_tracker.h" 15#include "video_core/renderer_opengl/gl_state_tracker.h"
17 16
18namespace Core { 17namespace Core {
@@ -111,9 +110,7 @@ private:
111 // OpenGL object IDs 110 // OpenGL object IDs
112 OGLSampler present_sampler; 111 OGLSampler present_sampler;
113 OGLBuffer vertex_buffer; 112 OGLBuffer vertex_buffer;
114 OGLProgram vertex_program; 113 OGLProgram present_program;
115 OGLProgram fragment_program;
116 OGLPipeline pipeline;
117 OGLFramebuffer screenshot_framebuffer; 114 OGLFramebuffer screenshot_framebuffer;
118 115
119 // GPU address of the vertex buffer 116 // GPU address of the vertex buffer
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp
index 8fb5be393..51e72b705 100644
--- a/src/video_core/renderer_opengl/util_shaders.cpp
+++ b/src/video_core/renderer_opengl/util_shaders.cpp
@@ -16,7 +16,6 @@
16#include "video_core/host_shaders/opengl_copy_bc4_comp.h" 16#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
17#include "video_core/host_shaders/opengl_copy_bgra_comp.h" 17#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
18#include "video_core/host_shaders/pitch_unswizzle_comp.h" 18#include "video_core/host_shaders/pitch_unswizzle_comp.h"
19#include "video_core/renderer_opengl/gl_resource_manager.h"
20#include "video_core/renderer_opengl/gl_shader_manager.h" 19#include "video_core/renderer_opengl/gl_shader_manager.h"
21#include "video_core/renderer_opengl/gl_texture_cache.h" 20#include "video_core/renderer_opengl/gl_texture_cache.h"
22#include "video_core/renderer_opengl/util_shaders.h" 21#include "video_core/renderer_opengl/util_shaders.h"
@@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
86 .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), 85 .width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
87 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), 86 .height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
88 }; 87 };
89 program_manager.BindHostCompute(astc_decoder_program.handle); 88 program_manager.BindProgram(astc_decoder_program.handle);
90 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 89 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
91 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); 90 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
92 91
@@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
134 static constexpr GLuint BINDING_INPUT_BUFFER = 1; 133 static constexpr GLuint BINDING_INPUT_BUFFER = 1;
135 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 134 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
136 135
137 program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); 136 program_manager.BindProgram(block_linear_unswizzle_2d_program.handle);
138 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 137 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
139 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 138 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
140 139
@@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
173 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; 172 static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
174 173
175 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 174 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
176 program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); 175 program_manager.BindProgram(block_linear_unswizzle_3d_program.handle);
177 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); 176 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
178 177
179 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); 178 const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
222 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), 221 UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
223 "Non-power of two images are not implemented"); 222 "Non-power of two images are not implemented");
224 223
225 program_manager.BindHostCompute(pitch_unswizzle_program.handle); 224 program_manager.BindProgram(pitch_unswizzle_program.handle);
226 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); 225 glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
227 glUniform2ui(LOC_ORIGIN, 0, 0); 226 glUniform2ui(LOC_ORIGIN, 0, 0);
228 glUniform2i(LOC_DESTINATION, 0, 0); 227 glUniform2i(LOC_DESTINATION, 0, 0);
@@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
250 static constexpr GLuint LOC_SRC_OFFSET = 0; 249 static constexpr GLuint LOC_SRC_OFFSET = 0;
251 static constexpr GLuint LOC_DST_OFFSET = 1; 250 static constexpr GLuint LOC_DST_OFFSET = 1;
252 251
253 program_manager.BindHostCompute(copy_bc4_program.handle); 252 program_manager.BindProgram(copy_bc4_program.handle);
254 253
255 for (const ImageCopy& copy : copies) { 254 for (const ImageCopy& copy : copies) {
256 ASSERT(copy.src_subresource.base_layer == 0); 255 ASSERT(copy.src_subresource.base_layer == 0);
@@ -286,7 +285,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
286 break; 285 break;
287 case 4: { 286 case 4: {
288 // BGRA8 copy 287 // BGRA8 copy
289 program_manager.BindHostCompute(copy_bgra_program.handle); 288 program_manager.BindProgram(copy_bgra_program.handle);
290 constexpr GLenum FORMAT = GL_RGBA8; 289 constexpr GLenum FORMAT = GL_RGBA8;
291 for (const ImageCopy& copy : copies) { 290 for (const ImageCopy& copy : copies) {
292 ASSERT(copy.src_offset == zero_offset); 291 ASSERT(copy.src_offset == zero_offset);
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h
index dd7d2cc0c..c6e5e059b 100644
--- a/src/video_core/renderer_vulkan/pipeline_helper.h
+++ b/src/video_core/renderer_vulkan/pipeline_helper.h
@@ -19,23 +19,6 @@
19 19
20namespace Vulkan { 20namespace Vulkan {
21 21
22struct TextureHandle {
23 explicit TextureHandle(u32 data, bool via_header_index) {
24 [[likely]] if (via_header_index) {
25 image = data;
26 sampler = data;
27 }
28 else {
29 const Tegra::Texture::TextureHandle handle{data};
30 image = handle.tic_id;
31 sampler = via_header_index ? image : handle.tsc_id.Value();
32 }
33 }
34
35 u32 image;
36 u32 sampler;
37};
38
39class DescriptorLayoutBuilder { 22class DescriptorLayoutBuilder {
40public: 23public:
41 DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} 24 DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index c52001b5a..c27402ff0 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -140,8 +140,8 @@ struct BufferCacheParams {
140 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; 140 static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
141 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; 141 static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
142 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; 142 static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
143 static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false;
144 static constexpr bool USE_MEMORY_MAPS = true; 143 static constexpr bool USE_MEMORY_MAPS = true;
144 static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
145}; 145};
146 146
147using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; 147using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index feaace0c5..168ffa7e9 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -18,6 +18,9 @@
18 18
19namespace Vulkan { 19namespace Vulkan {
20 20
21using Shader::ImageBufferDescriptor;
22using Tegra::Texture::TexturePair;
23
21ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, 24ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
22 VKUpdateDescriptorQueue& update_descriptor_queue_, 25 VKUpdateDescriptorQueue& update_descriptor_queue_,
23 Common::ThreadWorker* thread_worker, const Shader::Info& info_, 26 Common::ThreadWorker* thread_worker, const Shader::Info& info_,
@@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
106 secondary_offset}; 109 secondary_offset};
107 const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; 110 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
108 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; 111 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
109 return TextureHandle{lhs_raw | rhs_raw, via_header_index}; 112 return TexturePair(lhs_raw | rhs_raw, via_header_index);
110 } 113 }
111 } 114 }
112 return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; 115 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
113 }}; 116 }};
114 const auto add_image{[&](const auto& desc) { 117 const auto add_image{[&](const auto& desc) {
115 for (u32 index = 0; index < desc.count; ++index) { 118 for (u32 index = 0; index < desc.count; ++index) {
116 const TextureHandle handle{read_handle(desc, index)}; 119 const auto handle{read_handle(desc, index)};
117 image_view_indices.push_back(handle.image); 120 image_view_indices.push_back(handle.first);
118 } 121 }
119 }}; 122 }};
120 std::ranges::for_each(info.texture_buffer_descriptors, add_image); 123 std::ranges::for_each(info.texture_buffer_descriptors, add_image);
121 std::ranges::for_each(info.image_buffer_descriptors, add_image); 124 std::ranges::for_each(info.image_buffer_descriptors, add_image);
122 for (const auto& desc : info.texture_descriptors) { 125 for (const auto& desc : info.texture_descriptors) {
123 for (u32 index = 0; index < desc.count; ++index) { 126 for (u32 index = 0; index < desc.count; ++index) {
124 const TextureHandle handle{read_handle(desc, index)}; 127 const auto handle{read_handle(desc, index)};
125 image_view_indices.push_back(handle.image); 128 image_view_indices.push_back(handle.first);
126 129
127 Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); 130 Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
128 samplers.push_back(sampler->Handle()); 131 samplers.push_back(sampler->Handle());
129 } 132 }
130 } 133 }
@@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
137 ImageId* texture_buffer_ids{image_view_ids.data()}; 140 ImageId* texture_buffer_ids{image_view_ids.data()};
138 size_t index{}; 141 size_t index{};
139 const auto add_buffer{[&](const auto& desc) { 142 const auto add_buffer{[&](const auto& desc) {
143 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
140 for (u32 i = 0; i < desc.count; ++i) { 144 for (u32 i = 0; i < desc.count; ++i) {
141 bool is_written{false}; 145 bool is_written{false};
142 if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) { 146 if constexpr (is_image) {
143 is_written = desc.is_written; 147 is_written = desc.is_written;
144 } 148 }
145 ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); 149 ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
146 buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), 150 buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
147 image_view.BufferSize(), image_view.format, 151 image_view.BufferSize(), image_view.format,
148 is_written); 152 is_written, is_image);
149 ++texture_buffer_ids; 153 ++texture_buffer_ids;
150 ++index; 154 ++index;
151 } 155 }
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 9f5d30fe8..e5f54a84f 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -19,7 +19,7 @@
19#include "video_core/renderer_vulkan/vk_update_descriptor.h" 19#include "video_core/renderer_vulkan/vk_update_descriptor.h"
20#include "video_core/vulkan_common/vulkan_device.h" 20#include "video_core/vulkan_common/vulkan_device.h"
21 21
22#ifdef _MSC_VER 22#if defined(_MSC_VER) && defined(NDEBUG)
23#define LAMBDA_FORCEINLINE [[msvc::forceinline]] 23#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
24#else 24#else
25#define LAMBDA_FORCEINLINE 25#define LAMBDA_FORCEINLINE
@@ -30,6 +30,7 @@ namespace {
30using boost::container::small_vector; 30using boost::container::small_vector;
31using boost::container::static_vector; 31using boost::container::static_vector;
32using Shader::ImageBufferDescriptor; 32using Shader::ImageBufferDescriptor;
33using Tegra::Texture::TexturePair;
33using VideoCore::Surface::PixelFormat; 34using VideoCore::Surface::PixelFormat;
34using VideoCore::Surface::PixelFormatFromDepthFormat; 35using VideoCore::Surface::PixelFormatFromDepthFormat;
35using VideoCore::Surface::PixelFormatFromRenderTargetFormat; 36using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
@@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
289 const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; 290 const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
290 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; 291 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
291 const u32 raw{lhs_raw | rhs_raw}; 292 const u32 raw{lhs_raw | rhs_raw};
292 return TextureHandle{raw, via_header_index}; 293 return TexturePair(raw, via_header_index);
293 } 294 }
294 } 295 }
295 return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; 296 return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
296 }}; 297 }};
297 const auto add_image{[&](const auto& desc) { 298 const auto add_image{[&](const auto& desc) {
298 for (u32 index = 0; index < desc.count; ++index) { 299 for (u32 index = 0; index < desc.count; ++index) {
299 const TextureHandle handle{read_handle(desc, index)}; 300 const auto handle{read_handle(desc, index)};
300 image_view_indices[image_index++] = handle.image; 301 image_view_indices[image_index++] = handle.first;
301 } 302 }
302 }}; 303 }};
303 if constexpr (Spec::has_texture_buffers) { 304 if constexpr (Spec::has_texture_buffers) {
@@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
312 } 313 }
313 for (const auto& desc : info.texture_descriptors) { 314 for (const auto& desc : info.texture_descriptors) {
314 for (u32 index = 0; index < desc.count; ++index) { 315 for (u32 index = 0; index < desc.count; ++index) {
315 const TextureHandle handle{read_handle(desc, index)}; 316 const auto handle{read_handle(desc, index)};
316 image_view_indices[image_index++] = handle.image; 317 image_view_indices[image_index++] = handle.first;
317 318
318 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; 319 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
319 samplers[sampler_index++] = sampler->Handle(); 320 samplers[sampler_index++] = sampler->Handle();
320 } 321 }
321 } 322 }
@@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
347 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { 348 const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
348 size_t index{}; 349 size_t index{};
349 const auto add_buffer{[&](const auto& desc) { 350 const auto add_buffer{[&](const auto& desc) {
351 constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
350 for (u32 i = 0; i < desc.count; ++i) { 352 for (u32 i = 0; i < desc.count; ++i) {
351 bool is_written{false}; 353 bool is_written{false};
352 if constexpr (std::is_same_v<decltype(desc), const ImageBufferDescriptor&>) { 354 if constexpr (is_image) {
353 is_written = desc.is_written; 355 is_written = desc.is_written;
354 } 356 }
355 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; 357 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
356 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), 358 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
357 image_view.BufferSize(), image_view.format, 359 image_view.BufferSize(), image_view.format,
358 is_written); 360 is_written, is_image);
359 ++index; 361 ++index;
360 ++texture_buffer_index; 362 ++texture_buffer_index;
361 } 363 }
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 1334882b5..30b71bdbc 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -342,28 +342,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
342} 342}
343 343
344std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { 344std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
345 main_pools.ReleaseContents(); 345 GraphicsEnvironments environments;
346 346 GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
347 std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs;
348 boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs;
349 347
350 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; 348 main_pools.ReleaseContents();
351 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 349 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
352 if (graphics_key.unique_hashes[index] == 0) {
353 continue;
354 }
355 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
356 auto& env{graphics_envs[index]};
357 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
358 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
359 env.SetCachedSize(shader_infos[index]->size_bytes);
360 envs.push_back(&env);
361 }
362 auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
363 if (pipeline_cache_filename.empty()) { 350 if (pipeline_cache_filename.empty()) {
364 return pipeline; 351 return pipeline;
365 } 352 }
366 serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { 353 serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
367 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> 354 boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
368 env_ptrs; 355 env_ptrs;
369 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 356 for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 0f15ad2f7..ef14e91e7 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
96 return scissor; 96 return scissor;
97} 97}
98 98
99struct TextureHandle {
100 constexpr TextureHandle(u32 data, bool via_header_index) {
101 const Tegra::Texture::TextureHandle handle{data};
102 image = handle.tic_id;
103 sampler = via_header_index ? image : handle.tsc_id.Value();
104 }
105
106 u32 image;
107 u32 sampler;
108};
109
110DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, 99DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
111 bool is_indexed) { 100 bool is_indexed) {
112 DrawParams params{ 101 DrawParams params{
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index b8b8eace5..78bf90c48 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() {
91 return MakeShaderInfo(env, *cpu_shader_addr); 91 return MakeShaderInfo(env, *cpu_shader_addr);
92} 92}
93 93
94void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
95 const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
96 size_t env_index{};
97 const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
98 for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
99 if (unique_hashes[index] == 0) {
100 continue;
101 }
102 const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
103 auto& env{result.envs[index]};
104 const u32 start_address{maxwell3d.regs.shader_config[index].offset};
105 env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
106 env.SetCachedSize(shader_infos[index]->size_bytes);
107 result.env_ptrs[env_index++] = &env;
108 }
109}
110
94ShaderInfo* ShaderCache::TryGet(VAddr addr) const { 111ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
95 std::scoped_lock lock{lookup_mutex}; 112 std::scoped_lock lock{lookup_mutex};
96 113
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index 89a4bcc84..136fe294c 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -4,14 +4,18 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm>
8#include <array>
7#include <memory> 9#include <memory>
8#include <mutex> 10#include <mutex>
11#include <span>
9#include <unordered_map> 12#include <unordered_map>
10#include <utility> 13#include <utility>
11#include <vector> 14#include <vector>
12 15
13#include "common/common_types.h" 16#include "common/common_types.h"
14#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
18#include "video_core/shader_environment.h"
15 19
16namespace Tegra { 20namespace Tegra {
17class MemoryManager; 21class MemoryManager;
@@ -30,6 +34,8 @@ class ShaderCache {
30 static constexpr u64 PAGE_BITS = 14; 34 static constexpr u64 PAGE_BITS = 14;
31 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; 35 static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
32 36
37 static constexpr size_t NUM_PROGRAMS = 6;
38
33 struct Entry { 39 struct Entry {
34 VAddr addr_start; 40 VAddr addr_start;
35 VAddr addr_end; 41 VAddr addr_end;
@@ -58,6 +64,15 @@ public:
58 void SyncGuestHost(); 64 void SyncGuestHost();
59 65
60protected: 66protected:
67 struct GraphicsEnvironments {
68 std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
69 std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
70
71 std::span<Shader::Environment* const> Span() const noexcept {
72 return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
73 }
74 };
75
61 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, 76 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
62 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, 77 Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
63 Tegra::Engines::KeplerCompute& kepler_compute_); 78 Tegra::Engines::KeplerCompute& kepler_compute_);
@@ -65,17 +80,21 @@ protected:
65 /// @brief Update the hashes and information of shader stages 80 /// @brief Update the hashes and information of shader stages
66 /// @param unique_hashes Shader hashes to store into when a stage is enabled 81 /// @param unique_hashes Shader hashes to store into when a stage is enabled
67 /// @return True no success, false on error 82 /// @return True no success, false on error
68 bool RefreshStages(std::array<u64, 6>& unique_hashes); 83 bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
69 84
70 /// @brief Returns information about the current compute shader 85 /// @brief Returns information about the current compute shader
71 /// @return Pointer to a valid shader, nullptr on error 86 /// @return Pointer to a valid shader, nullptr on error
72 const ShaderInfo* ComputeShader(); 87 const ShaderInfo* ComputeShader();
73 88
89 /// @brief Collect the current graphics environments
90 void GetGraphicsEnvironments(GraphicsEnvironments& result,
91 const std::array<u64, NUM_PROGRAMS>& unique_hashes);
92
74 Tegra::MemoryManager& gpu_memory; 93 Tegra::MemoryManager& gpu_memory;
75 Tegra::Engines::Maxwell3D& maxwell3d; 94 Tegra::Engines::Maxwell3D& maxwell3d;
76 Tegra::Engines::KeplerCompute& kepler_compute; 95 Tegra::Engines::KeplerCompute& kepler_compute;
77 96
78 std::array<const ShaderInfo*, 6> shader_infos{}; 97 std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
79 bool last_shaders_valid = false; 98 bool last_shaders_valid = false;
80 99
81private: 100private:
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 5dccc0097..c93174519 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -187,8 +187,8 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
187 187
188Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, 188Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
189 bool via_header_index, u32 raw) { 189 bool via_header_index, u32 raw) {
190 const TextureHandle handle{raw, via_header_index}; 190 const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
191 const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; 191 const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
192 Tegra::Texture::TICEntry entry; 192 Tegra::Texture::TICEntry entry;
193 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); 193 gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
194 const Shader::TextureType result{ConvertType(entry)}; 194 const Shader::TextureType result{ConvertType(entry)};
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 37d712045..d26dbfaab 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -29,22 +29,6 @@ class Memorymanager;
29 29
30namespace VideoCommon { 30namespace VideoCommon {
31 31
32struct TextureHandle {
33 explicit TextureHandle(u32 data, bool via_header_index) {
34 if (via_header_index) {
35 image = data;
36 sampler = data;
37 } else {
38 const Tegra::Texture::TextureHandle handle{data};
39 image = handle.tic_id;
40 sampler = via_header_index ? image : handle.tsc_id.Value();
41 }
42 }
43
44 u32 image;
45 u32 sampler;
46};
47
48class GenericEnvironment : public Shader::Environment { 32class GenericEnvironment : public Shader::Environment {
49public: 33public:
50 explicit GenericEnvironment() = default; 34 explicit GenericEnvironment() = default;
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp
index d10ba4ccd..249cc4d0f 100644
--- a/src/video_core/texture_cache/formatter.cpp
+++ b/src/video_core/texture_cache/formatter.cpp
@@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
43 return "Invalid"; 43 return "Invalid";
44} 44}
45 45
46std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { 46std::string Name(const ImageViewBase& image_view) {
47 const u32 width = image_view.size.width; 47 const u32 width = image_view.size.width;
48 const u32 height = image_view.size.height; 48 const u32 height = image_view.size.height;
49 const u32 depth = image_view.size.depth; 49 const u32 depth = image_view.size.depth;
@@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
51 const u32 num_layers = image_view.range.extent.layers; 51 const u32 num_layers = image_view.range.extent.layers;
52 52
53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; 53 const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
54 switch (type.value_or(image_view.type)) { 54 switch (image_view.type) {
55 case ImageViewType::e1D: 55 case ImageViewType::e1D:
56 return fmt::format("ImageView 1D {}{}", width, level); 56 return fmt::format("ImageView 1D {}{}", width, level);
57 case ImageViewType::e2D: 57 case ImageViewType::e2D:
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index a48413983..c6cf0583f 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -255,8 +255,7 @@ struct RenderTargets;
255 255
256[[nodiscard]] std::string Name(const ImageBase& image); 256[[nodiscard]] std::string Name(const ImageBase& image);
257 257
258[[nodiscard]] std::string Name(const ImageViewBase& image_view, 258[[nodiscard]] std::string Name(const ImageViewBase& image_view);
259 std::optional<ImageViewType> type = std::nullopt);
260 259
261[[nodiscard]] std::string Name(const RenderTargets& render_targets); 260[[nodiscard]] std::string Name(const RenderTargets& render_targets);
262 261
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c1d14335e..1a9399455 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -154,6 +154,15 @@ union TextureHandle {
154}; 154};
155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); 155static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
156 156
157[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
158 if (via_header_index) {
159 return {raw, raw};
160 } else {
161 const Tegra::Texture::TextureHandle handle{raw};
162 return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
163 }
164}
165
157struct TICEntry { 166struct TICEntry {
158 union { 167 union {
159 struct { 168 struct {
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 2318c1bda..e27a2b51e 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
282 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ 282 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
283 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, 283 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
284 .pNext = nullptr, 284 .pNext = nullptr,
285 .storageBuffer16BitAccess = false, 285 .storageBuffer16BitAccess = true,
286 .uniformAndStorageBuffer16BitAccess = true, 286 .uniformAndStorageBuffer16BitAccess = true,
287 .storagePushConstant16 = false, 287 .storagePushConstant16 = false,
288 .storageInputOutput16 = false, 288 .storageInputOutput16 = false,