diff options
| author | 2021-05-23 04:28:34 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:30 -0400 | |
| commit | d621e96d0de212cc16897eadf71e8a1b2e1eb5dc (patch) | |
| tree | 8695f2f4dddf2564b63e4574d6616ccb0e79568c /src | |
| parent | spirv: Be aware of NAN unaware drivers (diff) | |
| download | yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.gz yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.tar.xz yuzu-d621e96d0de212cc16897eadf71e8a1b2e1eb5dc.zip | |
shader: Initial OpenGL implementation
Diffstat (limited to 'src')
38 files changed, 1427 insertions, 705 deletions
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b3c9fe72a..5913fdeff 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { | |||
| 355 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; | 355 | return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | Value IREmitter::LocalInvocationId() { | ||
| 359 | return Inst(Opcode::LocalInvocationId); | ||
| 360 | } | ||
| 361 | |||
| 358 | U32 IREmitter::LocalInvocationIdX() { | 362 | U32 IREmitter::LocalInvocationIdX() { |
| 359 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; | 363 | return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; |
| 360 | } | 364 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 4441c495d..a12919283 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -95,6 +95,7 @@ public: | |||
| 95 | [[nodiscard]] U32 WorkgroupIdY(); | 95 | [[nodiscard]] U32 WorkgroupIdY(); |
| 96 | [[nodiscard]] U32 WorkgroupIdZ(); | 96 | [[nodiscard]] U32 WorkgroupIdZ(); |
| 97 | 97 | ||
| 98 | [[nodiscard]] Value LocalInvocationId(); | ||
| 98 | [[nodiscard]] U32 LocalInvocationIdX(); | 99 | [[nodiscard]] U32 LocalInvocationIdX(); |
| 99 | [[nodiscard]] U32 LocalInvocationIdY(); | 100 | [[nodiscard]] U32 LocalInvocationIdY(); |
| 100 | [[nodiscard]] U32 LocalInvocationIdZ(); | 101 | [[nodiscard]] U32 LocalInvocationIdZ(); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index b0baff74b..01fb6f5e5 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -120,6 +120,13 @@ enum class SpecialRegister : u64 { | |||
| 120 | case SpecialRegister::SR_INVOCATION_INFO: | 120 | case SpecialRegister::SR_INVOCATION_INFO: |
| 121 | // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); | 121 | // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed"); |
| 122 | return ir.Imm32(0x00ff'0000); | 122 | return ir.Imm32(0x00ff'0000); |
| 123 | case SpecialRegister::SR_TID: { | ||
| 124 | const IR::Value tid{ir.LocalInvocationId()}; | ||
| 125 | return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, | ||
| 126 | IR::U32{ir.CompositeExtract(tid, 1)}, | ||
| 127 | ir.Imm32(16), ir.Imm32(8)), | ||
| 128 | IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); | ||
| 129 | } | ||
| 123 | case SpecialRegister::SR_TID_X: | 130 | case SpecialRegister::SR_TID_X: |
| 124 | return ir.LocalInvocationIdX(); | 131 | return ir.LocalInvocationIdX(); |
| 125 | case SpecialRegister::SR_TID_Y: | 132 | case SpecialRegister::SR_TID_Y: |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6e0e4b8f5..b008c37c0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -67,10 +67,14 @@ add_library(video_core STATIC | |||
| 67 | renderer_base.h | 67 | renderer_base.h |
| 68 | renderer_opengl/gl_buffer_cache.cpp | 68 | renderer_opengl/gl_buffer_cache.cpp |
| 69 | renderer_opengl/gl_buffer_cache.h | 69 | renderer_opengl/gl_buffer_cache.h |
| 70 | renderer_opengl/gl_compute_program.cpp | ||
| 71 | renderer_opengl/gl_compute_program.h | ||
| 70 | renderer_opengl/gl_device.cpp | 72 | renderer_opengl/gl_device.cpp |
| 71 | renderer_opengl/gl_device.h | 73 | renderer_opengl/gl_device.h |
| 72 | renderer_opengl/gl_fence_manager.cpp | 74 | renderer_opengl/gl_fence_manager.cpp |
| 73 | renderer_opengl/gl_fence_manager.h | 75 | renderer_opengl/gl_fence_manager.h |
| 76 | renderer_opengl/gl_graphics_program.cpp | ||
| 77 | renderer_opengl/gl_graphics_program.h | ||
| 74 | renderer_opengl/gl_rasterizer.cpp | 78 | renderer_opengl/gl_rasterizer.cpp |
| 75 | renderer_opengl/gl_rasterizer.h | 79 | renderer_opengl/gl_rasterizer.h |
| 76 | renderer_opengl/gl_resource_manager.cpp | 80 | renderer_opengl/gl_resource_manager.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 29746f61d..6c92e4c30 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -70,8 +70,8 @@ class BufferCache { | |||
| 70 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | 70 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; |
| 71 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | 71 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; |
| 72 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | 72 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; |
| 73 | static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; | ||
| 74 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | 73 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; |
| 74 | static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||
| 75 | 75 | ||
| 76 | static constexpr BufferId NULL_BUFFER_ID{0}; | 76 | static constexpr BufferId NULL_BUFFER_ID{0}; |
| 77 | 77 | ||
| @@ -154,7 +154,7 @@ public: | |||
| 154 | void UnbindGraphicsTextureBuffers(size_t stage); | 154 | void UnbindGraphicsTextureBuffers(size_t stage); |
| 155 | 155 | ||
| 156 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | 156 | void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, |
| 157 | PixelFormat format, bool is_written); | 157 | PixelFormat format, bool is_written, bool is_image); |
| 158 | 158 | ||
| 159 | void UnbindComputeStorageBuffers(); | 159 | void UnbindComputeStorageBuffers(); |
| 160 | 160 | ||
| @@ -164,7 +164,7 @@ public: | |||
| 164 | void UnbindComputeTextureBuffers(); | 164 | void UnbindComputeTextureBuffers(); |
| 165 | 165 | ||
| 166 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | 166 | void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, |
| 167 | bool is_written); | 167 | bool is_written, bool is_image); |
| 168 | 168 | ||
| 169 | void FlushCachedWrites(); | 169 | void FlushCachedWrites(); |
| 170 | 170 | ||
| @@ -197,6 +197,7 @@ public: | |||
| 197 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | 197 | [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); |
| 198 | 198 | ||
| 199 | std::mutex mutex; | 199 | std::mutex mutex; |
| 200 | Runtime& runtime; | ||
| 200 | 201 | ||
| 201 | private: | 202 | private: |
| 202 | template <typename Func> | 203 | template <typename Func> |
| @@ -366,7 +367,6 @@ private: | |||
| 366 | Tegra::Engines::KeplerCompute& kepler_compute; | 367 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 367 | Tegra::MemoryManager& gpu_memory; | 368 | Tegra::MemoryManager& gpu_memory; |
| 368 | Core::Memory::Memory& cpu_memory; | 369 | Core::Memory::Memory& cpu_memory; |
| 369 | Runtime& runtime; | ||
| 370 | 370 | ||
| 371 | SlotVector<Buffer> slot_buffers; | 371 | SlotVector<Buffer> slot_buffers; |
| 372 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | 372 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; |
| @@ -394,8 +394,10 @@ private: | |||
| 394 | 394 | ||
| 395 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | 395 | std::array<u32, NUM_STAGES> enabled_texture_buffers{}; |
| 396 | std::array<u32, NUM_STAGES> written_texture_buffers{}; | 396 | std::array<u32, NUM_STAGES> written_texture_buffers{}; |
| 397 | std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||
| 397 | u32 enabled_compute_texture_buffers = 0; | 398 | u32 enabled_compute_texture_buffers = 0; |
| 398 | u32 written_compute_texture_buffers = 0; | 399 | u32 written_compute_texture_buffers = 0; |
| 400 | u32 image_compute_texture_buffers = 0; | ||
| 399 | 401 | ||
| 400 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | 402 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; |
| 401 | 403 | ||
| @@ -431,8 +433,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
| 431 | Tegra::Engines::KeplerCompute& kepler_compute_, | 433 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 432 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 434 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 433 | Runtime& runtime_) | 435 | Runtime& runtime_) |
| 434 | : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | 436 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, |
| 435 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | 437 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { |
| 436 | // Ensure the first slot is used for the null buffer | 438 | // Ensure the first slot is used for the null buffer |
| 437 | void(slot_buffers.insert(runtime, NullBufferParams{})); | 439 | void(slot_buffers.insert(runtime, NullBufferParams{})); |
| 438 | deletion_iterator = slot_buffers.end(); | 440 | deletion_iterator = slot_buffers.end(); |
| @@ -703,13 +705,18 @@ template <class P> | |||
| 703 | void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { | 705 | void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { |
| 704 | enabled_texture_buffers[stage] = 0; | 706 | enabled_texture_buffers[stage] = 0; |
| 705 | written_texture_buffers[stage] = 0; | 707 | written_texture_buffers[stage] = 0; |
| 708 | image_texture_buffers[stage] = 0; | ||
| 706 | } | 709 | } |
| 707 | 710 | ||
| 708 | template <class P> | 711 | template <class P> |
| 709 | void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, | 712 | void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, |
| 710 | u32 size, PixelFormat format, bool is_written) { | 713 | u32 size, PixelFormat format, bool is_written, |
| 714 | bool is_image) { | ||
| 711 | enabled_texture_buffers[stage] |= 1U << tbo_index; | 715 | enabled_texture_buffers[stage] |= 1U << tbo_index; |
| 712 | written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; | 716 | written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; |
| 717 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||
| 718 | image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; | ||
| 719 | } | ||
| 713 | texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | 720 | texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); |
| 714 | } | 721 | } |
| 715 | 722 | ||
| @@ -717,6 +724,7 @@ template <class P> | |||
| 717 | void BufferCache<P>::UnbindComputeStorageBuffers() { | 724 | void BufferCache<P>::UnbindComputeStorageBuffers() { |
| 718 | enabled_compute_storage_buffers = 0; | 725 | enabled_compute_storage_buffers = 0; |
| 719 | written_compute_storage_buffers = 0; | 726 | written_compute_storage_buffers = 0; |
| 727 | image_compute_texture_buffers = 0; | ||
| 720 | } | 728 | } |
| 721 | 729 | ||
| 722 | template <class P> | 730 | template <class P> |
| @@ -737,13 +745,17 @@ template <class P> | |||
| 737 | void BufferCache<P>::UnbindComputeTextureBuffers() { | 745 | void BufferCache<P>::UnbindComputeTextureBuffers() { |
| 738 | enabled_compute_texture_buffers = 0; | 746 | enabled_compute_texture_buffers = 0; |
| 739 | written_compute_texture_buffers = 0; | 747 | written_compute_texture_buffers = 0; |
| 748 | image_compute_texture_buffers = 0; | ||
| 740 | } | 749 | } |
| 741 | 750 | ||
| 742 | template <class P> | 751 | template <class P> |
| 743 | void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, | 752 | void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, |
| 744 | PixelFormat format, bool is_written) { | 753 | PixelFormat format, bool is_written, bool is_image) { |
| 745 | enabled_compute_texture_buffers |= 1U << tbo_index; | 754 | enabled_compute_texture_buffers |= 1U << tbo_index; |
| 746 | written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; | 755 | written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; |
| 756 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||
| 757 | image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; | ||
| 758 | } | ||
| 747 | compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | 759 | compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); |
| 748 | } | 760 | } |
| 749 | 761 | ||
| @@ -1057,7 +1069,6 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 1057 | 1069 | ||
| 1058 | template <class P> | 1070 | template <class P> |
| 1059 | void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | 1071 | void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { |
| 1060 | u32 binding_index = 0; | ||
| 1061 | ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { | 1072 | ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { |
| 1062 | const TextureBufferBinding& binding = texture_buffers[stage][index]; | 1073 | const TextureBufferBinding& binding = texture_buffers[stage][index]; |
| 1063 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1074 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| @@ -1066,9 +1077,12 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 1066 | 1077 | ||
| 1067 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1078 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1068 | const PixelFormat format = binding.format; | 1079 | const PixelFormat format = binding.format; |
| 1069 | if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { | 1080 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 1070 | runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); | 1081 | if (((image_texture_buffers[stage] >> index) & 1) != 0) { |
| 1071 | ++binding_index; | 1082 | runtime.BindImageBuffer(buffer, offset, size, format); |
| 1083 | } else { | ||
| 1084 | runtime.BindTextureBuffer(buffer, offset, size, format); | ||
| 1085 | } | ||
| 1072 | } else { | 1086 | } else { |
| 1073 | runtime.BindTextureBuffer(buffer, offset, size, format); | 1087 | runtime.BindTextureBuffer(buffer, offset, size, format); |
| 1074 | } | 1088 | } |
| @@ -1139,7 +1153,6 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 1139 | 1153 | ||
| 1140 | template <class P> | 1154 | template <class P> |
| 1141 | void BufferCache<P>::BindHostComputeTextureBuffers() { | 1155 | void BufferCache<P>::BindHostComputeTextureBuffers() { |
| 1142 | u32 binding_index = 0; | ||
| 1143 | ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { | 1156 | ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { |
| 1144 | const TextureBufferBinding& binding = compute_texture_buffers[index]; | 1157 | const TextureBufferBinding& binding = compute_texture_buffers[index]; |
| 1145 | Buffer& buffer = slot_buffers[binding.buffer_id]; | 1158 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| @@ -1148,9 +1161,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 1148 | 1161 | ||
| 1149 | const u32 offset = buffer.Offset(binding.cpu_addr); | 1162 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 1150 | const PixelFormat format = binding.format; | 1163 | const PixelFormat format = binding.format; |
| 1151 | if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { | 1164 | if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { |
| 1152 | runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); | 1165 | if (((image_compute_texture_buffers >> index) & 1) != 0) { |
| 1153 | ++binding_index; | 1166 | runtime.BindImageBuffer(buffer, offset, size, format); |
| 1167 | } else { | ||
| 1168 | runtime.BindTextureBuffer(buffer, offset, size, format); | ||
| 1169 | } | ||
| 1154 | } else { | 1170 | } else { |
| 1155 | runtime.BindTextureBuffer(buffer, offset, size, format); | 1171 | runtime.BindTextureBuffer(buffer, offset, size, format); |
| 1156 | } | 1172 | } |
| @@ -1339,11 +1355,10 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
| 1339 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | 1355 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { |
| 1340 | // Resolve buffer | 1356 | // Resolve buffer |
| 1341 | Binding& binding = compute_storage_buffers[index]; | 1357 | Binding& binding = compute_storage_buffers[index]; |
| 1342 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | 1358 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 1343 | binding.buffer_id = buffer_id; | ||
| 1344 | // Mark as written if needed | 1359 | // Mark as written if needed |
| 1345 | if (((written_compute_storage_buffers >> index) & 1) != 0) { | 1360 | if (((written_compute_storage_buffers >> index) & 1) != 0) { |
| 1346 | MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | 1361 | MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); |
| 1347 | } | 1362 | } |
| 1348 | }); | 1363 | }); |
| 1349 | } | 1364 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index c4189fb60..2d0ef1307 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,14 +2,18 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 5 | #include <span> | 6 | #include <span> |
| 6 | 7 | ||
| 7 | #include "video_core/buffer_cache/buffer_cache.h" | 8 | #include "video_core/buffer_cache/buffer_cache.h" |
| 8 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 9 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 9 | #include "video_core/renderer_opengl/gl_device.h" | 10 | #include "video_core/renderer_opengl/gl_device.h" |
| 11 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 10 | 12 | ||
| 11 | namespace OpenGL { | 13 | namespace OpenGL { |
| 12 | namespace { | 14 | namespace { |
| 15 | using VideoCore::Surface::PixelFormat; | ||
| 16 | |||
| 13 | struct BindlessSSBO { | 17 | struct BindlessSSBO { |
| 14 | GLuint64EXT address; | 18 | GLuint64EXT address; |
| 15 | GLsizei length; | 19 | GLsizei length; |
| @@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { | |||
| 62 | glMakeNamedBufferResidentNV(buffer.handle, access); | 66 | glMakeNamedBufferResidentNV(buffer.handle, access); |
| 63 | } | 67 | } |
| 64 | 68 | ||
| 69 | GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { | ||
| 70 | const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { | ||
| 71 | return offset == view.offset && size == view.size && format == view.format; | ||
| 72 | })}; | ||
| 73 | if (it != views.end()) { | ||
| 74 | return it->texture.handle; | ||
| 75 | } | ||
| 76 | OGLTexture texture; | ||
| 77 | texture.Create(GL_TEXTURE_BUFFER); | ||
| 78 | const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; | ||
| 79 | glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); | ||
| 80 | views.push_back({ | ||
| 81 | .offset = offset, | ||
| 82 | .size = size, | ||
| 83 | .format = format, | ||
| 84 | .texture = std::move(texture), | ||
| 85 | }); | ||
| 86 | return views.back().texture.handle; | ||
| 87 | } | ||
| 88 | |||
| 65 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | 89 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_) |
| 66 | : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | 90 | : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, |
| 67 | use_assembly_shaders{device.UseAssemblyShaders()}, | 91 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| @@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff | |||
| 144 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | 168 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, |
| 145 | static_cast<GLsizeiptr>(size)); | 169 | static_cast<GLsizeiptr>(size)); |
| 146 | } else { | 170 | } else { |
| 147 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 171 | const GLuint base_binding = graphics_base_uniform_bindings[stage]; |
| 148 | const GLuint binding = base_binding + binding_index; | 172 | const GLuint binding = base_binding + binding_index; |
| 149 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), | 173 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), |
| 150 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 174 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| @@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff | |||
| 181 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | 205 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, |
| 182 | reinterpret_cast<const GLuint*>(&ssbo)); | 206 | reinterpret_cast<const GLuint*>(&ssbo)); |
| 183 | } else { | 207 | } else { |
| 184 | const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; | 208 | const GLuint base_binding = graphics_base_storage_bindings[stage]; |
| 185 | const GLuint binding = base_binding + binding_index; | 209 | const GLuint binding = base_binding + binding_index; |
| 186 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | 210 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), |
| 187 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 211 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| @@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, | |||
| 213 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 237 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| 214 | } | 238 | } |
| 215 | 239 | ||
| 240 | void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 241 | PixelFormat format) { | ||
| 242 | *texture_handles++ = buffer.View(offset, size, format); | ||
| 243 | } | ||
| 244 | |||
| 245 | void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { | ||
| 246 | *image_handles++ = buffer.View(offset, size, format); | ||
| 247 | } | ||
| 248 | |||
| 216 | } // namespace OpenGL | 249 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index ddcce5e97..4986c65fd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -32,6 +32,8 @@ public: | |||
| 32 | 32 | ||
| 33 | void MakeResident(GLenum access) noexcept; | 33 | void MakeResident(GLenum access) noexcept; |
| 34 | 34 | ||
| 35 | [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||
| 36 | |||
| 35 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | 37 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |
| 36 | return address; | 38 | return address; |
| 37 | } | 39 | } |
| @@ -41,9 +43,17 @@ public: | |||
| 41 | } | 43 | } |
| 42 | 44 | ||
| 43 | private: | 45 | private: |
| 46 | struct BufferView { | ||
| 47 | u32 offset; | ||
| 48 | u32 size; | ||
| 49 | VideoCore::Surface::PixelFormat format; | ||
| 50 | OGLTexture texture; | ||
| 51 | }; | ||
| 52 | |||
| 44 | GLuint64EXT address = 0; | 53 | GLuint64EXT address = 0; |
| 45 | OGLBuffer buffer; | 54 | OGLBuffer buffer; |
| 46 | GLenum current_residency_access = GL_NONE; | 55 | GLenum current_residency_access = GL_NONE; |
| 56 | std::vector<BufferView> views; | ||
| 47 | }; | 57 | }; |
| 48 | 58 | ||
| 49 | class BufferCacheRuntime { | 59 | class BufferCacheRuntime { |
| @@ -75,13 +85,19 @@ public: | |||
| 75 | 85 | ||
| 76 | void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); | 86 | void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); |
| 77 | 87 | ||
| 88 | void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 89 | VideoCore::Surface::PixelFormat format); | ||
| 90 | |||
| 91 | void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, | ||
| 92 | VideoCore::Surface::PixelFormat format); | ||
| 93 | |||
| 78 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { | 94 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { |
| 79 | if (use_assembly_shaders) { | 95 | if (use_assembly_shaders) { |
| 80 | const GLuint handle = fast_uniforms[stage][binding_index].handle; | 96 | const GLuint handle = fast_uniforms[stage][binding_index].handle; |
| 81 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | 97 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); |
| 82 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); | 98 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); |
| 83 | } else { | 99 | } else { |
| 84 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 100 | const GLuint base_binding = graphics_base_uniform_bindings[stage]; |
| 85 | const GLuint binding = base_binding + binding_index; | 101 | const GLuint binding = base_binding + binding_index; |
| 86 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | 102 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, |
| 87 | fast_uniforms[stage][binding_index].handle, 0, | 103 | fast_uniforms[stage][binding_index].handle, 0, |
| @@ -103,7 +119,7 @@ public: | |||
| 103 | 119 | ||
| 104 | std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { | 120 | std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { |
| 105 | const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); | 121 | const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); |
| 106 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | 122 | const GLuint base_binding = graphics_base_uniform_bindings[stage]; |
| 107 | const GLuint binding = base_binding + binding_index; | 123 | const GLuint binding = base_binding + binding_index; |
| 108 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), | 124 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), |
| 109 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | 125 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |
| @@ -118,6 +134,19 @@ public: | |||
| 118 | return has_fast_buffer_sub_data; | 134 | return has_fast_buffer_sub_data; |
| 119 | } | 135 | } |
| 120 | 136 | ||
| 137 | void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) { | ||
| 138 | graphics_base_uniform_bindings = bindings; | ||
| 139 | } | ||
| 140 | |||
| 141 | void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) { | ||
| 142 | graphics_base_storage_bindings = bindings; | ||
| 143 | } | ||
| 144 | |||
| 145 | void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { | ||
| 146 | texture_handles = texture_handles_; | ||
| 147 | image_handles = image_handles_; | ||
| 148 | } | ||
| 149 | |||
| 121 | private: | 150 | private: |
| 122 | static constexpr std::array PABO_LUT{ | 151 | static constexpr std::array PABO_LUT{ |
| 123 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 152 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| @@ -133,6 +162,11 @@ private: | |||
| 133 | 162 | ||
| 134 | u32 max_attributes = 0; | 163 | u32 max_attributes = 0; |
| 135 | 164 | ||
| 165 | std::array<GLuint, 5> graphics_base_uniform_bindings{}; | ||
| 166 | std::array<GLuint, 5> graphics_base_storage_bindings{}; | ||
| 167 | GLuint* texture_handles = nullptr; | ||
| 168 | GLuint* image_handles = nullptr; | ||
| 169 | |||
| 136 | std::optional<StreamBuffer> stream_buffer; | 170 | std::optional<StreamBuffer> stream_buffer; |
| 137 | 171 | ||
| 138 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | 172 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, |
| @@ -155,8 +189,8 @@ struct BufferCacheParams { | |||
| 155 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; | 189 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; |
| 156 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | 190 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; |
| 157 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | 191 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; |
| 158 | static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; | ||
| 159 | static constexpr bool USE_MEMORY_MAPS = false; | 192 | static constexpr bool USE_MEMORY_MAPS = false; |
| 193 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | ||
| 160 | }; | 194 | }; |
| 161 | 195 | ||
| 162 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 196 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_opengl/gl_compute_program.cpp b/src/video_core/renderer_opengl/gl_compute_program.cpp new file mode 100644 index 000000000..d5ef65439 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.cpp | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | |||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "video_core/renderer_opengl/gl_compute_program.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 10 | |||
| 11 | namespace OpenGL { | ||
| 12 | |||
| 13 | using Shader::ImageBufferDescriptor; | ||
| 14 | using Tegra::Texture::TexturePair; | ||
| 15 | using VideoCommon::ImageId; | ||
| 16 | |||
| 17 | constexpr u32 MAX_TEXTURES = 64; | ||
| 18 | constexpr u32 MAX_IMAGES = 16; | ||
| 19 | |||
| 20 | size_t ComputeProgramKey::Hash() const noexcept { | ||
| 21 | return static_cast<size_t>( | ||
| 22 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); | ||
| 23 | } | ||
| 24 | |||
| 25 | bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { | ||
| 26 | return std::memcmp(this, &rhs, sizeof *this) == 0; | ||
| 27 | } | ||
| 28 | |||
| 29 | ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 30 | Tegra::MemoryManager& gpu_memory_, | ||
| 31 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 32 | ProgramManager& program_manager_, OGLProgram program_, | ||
| 33 | const Shader::Info& info_) | ||
| 34 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, | ||
| 35 | kepler_compute{kepler_compute_}, | ||
| 36 | program_manager{program_manager_}, program{std::move(program_)}, info{info_} { | ||
| 37 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 38 | num_texture_buffers += desc.count; | ||
| 39 | } | ||
| 40 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 41 | num_image_buffers += desc.count; | ||
| 42 | } | ||
| 43 | u32 num_textures = num_texture_buffers; | ||
| 44 | for (const auto& desc : info.texture_descriptors) { | ||
| 45 | num_textures += desc.count; | ||
| 46 | } | ||
| 47 | ASSERT(num_textures <= MAX_TEXTURES); | ||
| 48 | |||
| 49 | u32 num_images = num_image_buffers; | ||
| 50 | for (const auto& desc : info.image_descriptors) { | ||
| 51 | num_images += desc.count; | ||
| 52 | } | ||
| 53 | ASSERT(num_images <= MAX_IMAGES); | ||
| 54 | } | ||
| 55 | |||
| 56 | void ComputeProgram::Configure() { | ||
| 57 | buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); | ||
| 58 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 59 | size_t ssbo_index{}; | ||
| 60 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 61 | ASSERT(desc.count == 1); | ||
| 62 | buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, | ||
| 63 | desc.is_written); | ||
| 64 | ++ssbo_index; | ||
| 65 | } | ||
| 66 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 67 | |||
| 68 | std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | ||
| 69 | boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||
| 70 | std::array<GLuint, MAX_TEXTURES> samplers; | ||
| 71 | std::array<GLuint, MAX_TEXTURES> textures; | ||
| 72 | std::array<GLuint, MAX_IMAGES> images; | ||
| 73 | GLsizei sampler_binding{}; | ||
| 74 | GLsizei texture_binding{}; | ||
| 75 | GLsizei image_binding{}; | ||
| 76 | |||
| 77 | const auto& qmd{kepler_compute.launch_description}; | ||
| 78 | const auto& cbufs{qmd.const_buffer_config}; | ||
| 79 | const bool via_header_index{qmd.linked_tsc != 0}; | ||
| 80 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 81 | ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); | ||
| 82 | const u32 index_offset{index << desc.size_shift}; | ||
| 83 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 84 | const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; | ||
| 85 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 86 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 87 | if (desc.has_secondary) { | ||
| 88 | ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); | ||
| 89 | const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 90 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + | ||
| 91 | secondary_offset}; | ||
| 92 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 93 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 94 | return TexturePair(lhs_raw | rhs_raw, via_header_index); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 98 | }}; | ||
| 99 | const auto add_image{[&](const auto& desc) { | ||
| 100 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 101 | const auto handle{read_handle(desc, index)}; | ||
| 102 | image_view_indices.push_back(handle.first); | ||
| 103 | } | ||
| 104 | }}; | ||
| 105 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 106 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 107 | const auto handle{read_handle(desc, index)}; | ||
| 108 | image_view_indices.push_back(handle.first); | ||
| 109 | samplers[sampler_binding++] = 0; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | ||
| 113 | for (const auto& desc : info.texture_descriptors) { | ||
| 114 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 115 | const auto handle{read_handle(desc, index)}; | ||
| 116 | image_view_indices.push_back(handle.first); | ||
| 117 | |||
| 118 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | ||
| 119 | samplers[sampler_binding++] = sampler->Handle(); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | std::ranges::for_each(info.image_descriptors, add_image); | ||
| 123 | |||
| 124 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||
| 125 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||
| 126 | |||
| 127 | buffer_cache.UnbindComputeTextureBuffers(); | ||
| 128 | size_t texbuf_index{}; | ||
| 129 | const auto add_buffer{[&](const auto& desc) { | ||
| 130 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 131 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 132 | bool is_written{false}; | ||
| 133 | if constexpr (is_image) { | ||
| 134 | is_written = desc.is_written; | ||
| 135 | } | ||
| 136 | ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; | ||
| 137 | buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), | ||
| 138 | image_view.BufferSize(), image_view.format, | ||
| 139 | is_written, is_image); | ||
| 140 | ++texbuf_index; | ||
| 141 | } | ||
| 142 | }}; | ||
| 143 | std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); | ||
| 144 | std::ranges::for_each(info.image_buffer_descriptors, add_buffer); | ||
| 145 | |||
| 146 | buffer_cache.UpdateComputeBuffers(); | ||
| 147 | |||
| 148 | buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); | ||
| 149 | buffer_cache.BindHostComputeBuffers(); | ||
| 150 | |||
| 151 | const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; | ||
| 152 | texture_binding += num_texture_buffers; | ||
| 153 | image_binding += num_image_buffers; | ||
| 154 | |||
| 155 | for (const auto& desc : info.texture_descriptors) { | ||
| 156 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 157 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 158 | textures[texture_binding++] = image_view.Handle(desc.type); | ||
| 159 | } | ||
| 160 | } | ||
| 161 | for (const auto& desc : info.image_descriptors) { | ||
| 162 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 163 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 164 | images[image_binding++] = image_view.Handle(desc.type); | ||
| 165 | } | ||
| 166 | } | ||
| 167 | if (texture_binding != 0) { | ||
| 168 | ASSERT(texture_binding == sampler_binding); | ||
| 169 | glBindTextures(0, texture_binding, textures.data()); | ||
| 170 | glBindSamplers(0, sampler_binding, samplers.data()); | ||
| 171 | } | ||
| 172 | if (image_binding != 0) { | ||
| 173 | glBindImageTextures(0, image_binding, images.data()); | ||
| 174 | } | ||
| 175 | program_manager.BindProgram(program.handle); | ||
| 176 | } | ||
| 177 | |||
| 178 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_compute_program.h b/src/video_core/renderer_opengl/gl_compute_program.h new file mode 100644 index 000000000..64a75d44d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_compute_program.h | |||
| @@ -0,0 +1,83 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/shader_info.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 16 | |||
| 17 | namespace Tegra { | ||
| 18 | class MemoryManager; | ||
| 19 | } | ||
| 20 | |||
| 21 | namespace Tegra::Engines { | ||
| 22 | class KeplerCompute; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace Shader { | ||
| 26 | struct Info; | ||
| 27 | } | ||
| 28 | |||
| 29 | namespace OpenGL { | ||
| 30 | |||
| 31 | class ProgramManager; | ||
| 32 | |||
| 33 | struct ComputeProgramKey { | ||
| 34 | u64 unique_hash; | ||
| 35 | u32 shared_memory_size; | ||
| 36 | std::array<u32, 3> workgroup_size; | ||
| 37 | |||
| 38 | size_t Hash() const noexcept; | ||
| 39 | |||
| 40 | bool operator==(const ComputeProgramKey&) const noexcept; | ||
| 41 | |||
| 42 | bool operator!=(const ComputeProgramKey& rhs) const noexcept { | ||
| 43 | return !operator==(rhs); | ||
| 44 | } | ||
| 45 | }; | ||
| 46 | static_assert(std::has_unique_object_representations_v<ComputeProgramKey>); | ||
| 47 | static_assert(std::is_trivially_copyable_v<ComputeProgramKey>); | ||
| 48 | static_assert(std::is_trivially_constructible_v<ComputeProgramKey>); | ||
| 49 | |||
| 50 | class ComputeProgram { | ||
| 51 | public: | ||
| 52 | explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 53 | Tegra::MemoryManager& gpu_memory_, | ||
| 54 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 55 | ProgramManager& program_manager_, OGLProgram program_, | ||
| 56 | const Shader::Info& info_); | ||
| 57 | |||
| 58 | void Configure(); | ||
| 59 | |||
| 60 | private: | ||
| 61 | TextureCache& texture_cache; | ||
| 62 | BufferCache& buffer_cache; | ||
| 63 | Tegra::MemoryManager& gpu_memory; | ||
| 64 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 65 | ProgramManager& program_manager; | ||
| 66 | |||
| 67 | OGLProgram program; | ||
| 68 | Shader::Info info; | ||
| 69 | |||
| 70 | u32 num_texture_buffers{}; | ||
| 71 | u32 num_image_buffers{}; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace OpenGL | ||
| 75 | |||
| 76 | namespace std { | ||
| 77 | template <> | ||
| 78 | struct hash<OpenGL::ComputeProgramKey> { | ||
| 79 | size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { | ||
| 80 | return k.Hash(); | ||
| 81 | } | ||
| 82 | }; | ||
| 83 | } // namespace std | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 3b00614e7..18bbc4c1f 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -22,34 +22,11 @@ | |||
| 22 | 22 | ||
| 23 | namespace OpenGL { | 23 | namespace OpenGL { |
| 24 | namespace { | 24 | namespace { |
| 25 | // One uniform block is reserved for emulation purposes | ||
| 26 | constexpr u32 ReservedUniformBlocks = 1; | ||
| 27 | |||
| 28 | constexpr u32 NumStages = 5; | ||
| 29 | |||
| 30 | constexpr std::array LIMIT_UBOS = { | 25 | constexpr std::array LIMIT_UBOS = { |
| 31 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | 26 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, |
| 32 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, | 27 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, |
| 33 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, | 28 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS, |
| 34 | }; | 29 | }; |
| 35 | constexpr std::array LIMIT_SSBOS = { | ||
| 36 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | ||
| 37 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | ||
| 38 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, | ||
| 39 | }; | ||
| 40 | constexpr std::array LIMIT_SAMPLERS = { | ||
| 41 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, | ||
| 42 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | ||
| 43 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, | ||
| 44 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | ||
| 45 | GL_MAX_TEXTURE_IMAGE_UNITS, | ||
| 46 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, | ||
| 47 | }; | ||
| 48 | constexpr std::array LIMIT_IMAGES = { | ||
| 49 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | ||
| 50 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, | ||
| 51 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS, | ||
| 52 | }; | ||
| 53 | 30 | ||
| 54 | template <typename T> | 31 | template <typename T> |
| 55 | T GetInteger(GLenum pname) { | 32 | T GetInteger(GLenum pname) { |
| @@ -82,15 +59,6 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view | |||
| 82 | return std::ranges::find(extensions, extension) != extensions.end(); | 59 | return std::ranges::find(extensions, extension) != extensions.end(); |
| 83 | } | 60 | } |
| 84 | 61 | ||
| 85 | u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | ||
| 86 | ASSERT(num >= amount); | ||
| 87 | if (limit) { | ||
| 88 | amount = std::min(amount, GetInteger<u32>(*limit)); | ||
| 89 | } | ||
| 90 | num -= amount; | ||
| 91 | return std::exchange(base, base + amount); | ||
| 92 | } | ||
| 93 | |||
| 94 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | 62 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { |
| 95 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; | 63 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; |
| 96 | std::ranges::transform(LIMIT_UBOS, max.begin(), | 64 | std::ranges::transform(LIMIT_UBOS, max.begin(), |
| @@ -98,62 +66,6 @@ std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcep | |||
| 98 | return max; | 66 | return max; |
| 99 | } | 67 | } |
| 100 | 68 | ||
| 101 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { | ||
| 102 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; | ||
| 103 | |||
| 104 | static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; | ||
| 105 | const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); | ||
| 106 | const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); | ||
| 107 | const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); | ||
| 108 | |||
| 109 | u32 num_ubos = total_ubos - ReservedUniformBlocks; | ||
| 110 | u32 num_ssbos = total_ssbos; | ||
| 111 | u32 num_samplers = total_samplers; | ||
| 112 | |||
| 113 | u32 base_ubo = ReservedUniformBlocks; | ||
| 114 | u32 base_ssbo = 0; | ||
| 115 | u32 base_samplers = 0; | ||
| 116 | |||
| 117 | for (std::size_t i = 0; i < NumStages; ++i) { | ||
| 118 | const std::size_t stage = stage_swizzle[i]; | ||
| 119 | bindings[stage] = { | ||
| 120 | Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), | ||
| 121 | Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), | ||
| 122 | Extract(base_samplers, num_samplers, total_samplers / NumStages, | ||
| 123 | LIMIT_SAMPLERS[stage])}; | ||
| 124 | } | ||
| 125 | |||
| 126 | u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); | ||
| 127 | u32 base_images = 0; | ||
| 128 | |||
| 129 | // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8. | ||
| 130 | // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the | ||
| 131 | // fragment stage, and at least 1 for the rest of the stages. | ||
| 132 | // So far games are observed to use 1 image binding on vertex and 4 on fragment stages. | ||
| 133 | |||
| 134 | // Reserve at least 4 image bindings on the fragment stage. | ||
| 135 | bindings[4].image = | ||
| 136 | Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); | ||
| 137 | |||
| 138 | // This is guaranteed to be at least 1. | ||
| 139 | const u32 total_extracted_images = num_images / (NumStages - 1); | ||
| 140 | |||
| 141 | // Reserve the other image bindings. | ||
| 142 | for (std::size_t i = 0; i < NumStages; ++i) { | ||
| 143 | const std::size_t stage = stage_swizzle[i]; | ||
| 144 | if (stage == 4) { | ||
| 145 | continue; | ||
| 146 | } | ||
| 147 | bindings[stage].image = | ||
| 148 | Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); | ||
| 149 | } | ||
| 150 | |||
| 151 | // Compute doesn't care about any of this. | ||
| 152 | bindings[5] = {0, 0, 0, 0}; | ||
| 153 | |||
| 154 | return bindings; | ||
| 155 | } | ||
| 156 | |||
| 157 | bool IsASTCSupported() { | 69 | bool IsASTCSupported() { |
| 158 | static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; | 70 | static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; |
| 159 | static constexpr std::array formats = { | 71 | static constexpr std::array formats = { |
| @@ -225,7 +137,6 @@ Device::Device() { | |||
| 225 | } | 137 | } |
| 226 | 138 | ||
| 227 | max_uniform_buffers = BuildMaxUniformBuffers(); | 139 | max_uniform_buffers = BuildMaxUniformBuffers(); |
| 228 | base_bindings = BuildBaseBindings(); | ||
| 229 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 140 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 230 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 141 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 231 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 142 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 2c2b13767..152a3acd3 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -12,13 +12,6 @@ namespace OpenGL { | |||
| 12 | 12 | ||
| 13 | class Device { | 13 | class Device { |
| 14 | public: | 14 | public: |
| 15 | struct BaseBindings { | ||
| 16 | u32 uniform_buffer{}; | ||
| 17 | u32 shader_storage_buffer{}; | ||
| 18 | u32 sampler{}; | ||
| 19 | u32 image{}; | ||
| 20 | }; | ||
| 21 | |||
| 22 | explicit Device(); | 15 | explicit Device(); |
| 23 | explicit Device(std::nullptr_t); | 16 | explicit Device(std::nullptr_t); |
| 24 | 17 | ||
| @@ -28,14 +21,6 @@ public: | |||
| 28 | return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; | 21 | return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; |
| 29 | } | 22 | } |
| 30 | 23 | ||
| 31 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | ||
| 32 | return base_bindings[stage_index]; | ||
| 33 | } | ||
| 34 | |||
| 35 | const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { | ||
| 36 | return GetBaseBindings(static_cast<std::size_t>(shader_type)); | ||
| 37 | } | ||
| 38 | |||
| 39 | size_t GetUniformBufferAlignment() const { | 24 | size_t GetUniformBufferAlignment() const { |
| 40 | return uniform_buffer_alignment; | 25 | return uniform_buffer_alignment; |
| 41 | } | 26 | } |
| @@ -134,7 +119,6 @@ private: | |||
| 134 | 119 | ||
| 135 | std::string vendor_name; | 120 | std::string vendor_name; |
| 136 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; | 121 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; |
| 137 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | ||
| 138 | size_t uniform_buffer_alignment{}; | 122 | size_t uniform_buffer_alignment{}; |
| 139 | size_t shader_storage_alignment{}; | 123 | size_t shader_storage_alignment{}; |
| 140 | u32 max_vertex_attributes{}; | 124 | u32 max_vertex_attributes{}; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_program.cpp b/src/video_core/renderer_opengl/gl_graphics_program.cpp new file mode 100644 index 000000000..fd0958719 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.cpp | |||
| @@ -0,0 +1,296 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | |||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "shader_recompiler/shader_info.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_graphics_program.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 12 | #include "video_core/texture_cache/texture_cache.h" | ||
| 13 | |||
| 14 | namespace OpenGL { | ||
| 15 | |||
| 16 | using Shader::ImageBufferDescriptor; | ||
| 17 | using Tegra::Texture::TexturePair; | ||
| 18 | using VideoCommon::ImageId; | ||
| 19 | |||
| 20 | constexpr u32 MAX_TEXTURES = 64; | ||
| 21 | constexpr u32 MAX_IMAGES = 8; | ||
| 22 | |||
| 23 | size_t GraphicsProgramKey::Hash() const noexcept { | ||
| 24 | return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size())); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { | ||
| 28 | return std::memcmp(this, &rhs, Size()) == 0; | ||
| 29 | } | ||
| 30 | |||
| 31 | GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 32 | Tegra::MemoryManager& gpu_memory_, | ||
| 33 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 34 | ProgramManager& program_manager_, StateTracker& state_tracker_, | ||
| 35 | OGLProgram program_, | ||
| 36 | const std::array<const Shader::Info*, 5>& infos) | ||
| 37 | : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, | ||
| 38 | gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, | ||
| 39 | state_tracker{state_tracker_}, program{std::move(program_)} { | ||
| 40 | std::ranges::transform(infos, stage_infos.begin(), | ||
| 41 | [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | ||
| 42 | |||
| 43 | u32 num_textures{}; | ||
| 44 | u32 num_images{}; | ||
| 45 | for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { | ||
| 46 | const auto& info{stage_infos[stage]}; | ||
| 47 | base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; | ||
| 48 | base_storage_bindings[stage + 1] = base_storage_bindings[stage]; | ||
| 49 | for (const auto& desc : info.constant_buffer_descriptors) { | ||
| 50 | base_uniform_bindings[stage + 1] += desc.count; | ||
| 51 | } | ||
| 52 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 53 | base_storage_bindings[stage + 1] += desc.count; | ||
| 54 | } | ||
| 55 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 56 | num_texture_buffers[stage] += desc.count; | ||
| 57 | num_textures += desc.count; | ||
| 58 | } | ||
| 59 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 60 | num_image_buffers[stage] += desc.count; | ||
| 61 | num_images += desc.count; | ||
| 62 | } | ||
| 63 | for (const auto& desc : info.texture_descriptors) { | ||
| 64 | num_textures += desc.count; | ||
| 65 | } | ||
| 66 | for (const auto& desc : info.image_descriptors) { | ||
| 67 | num_images += desc.count; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | ASSERT(num_textures <= MAX_TEXTURES); | ||
| 71 | ASSERT(num_images <= MAX_IMAGES); | ||
| 72 | } | ||
| 73 | |||
| 74 | struct Spec { | ||
| 75 | static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||
| 76 | static constexpr bool has_storage_buffers = true; | ||
| 77 | static constexpr bool has_texture_buffers = true; | ||
| 78 | static constexpr bool has_image_buffers = true; | ||
| 79 | static constexpr bool has_images = true; | ||
| 80 | }; | ||
| 81 | |||
| 82 | void GraphicsProgram::Configure(bool is_indexed) { | ||
| 83 | std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | ||
| 84 | std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||
| 85 | std::array<GLuint, MAX_TEXTURES> samplers; | ||
| 86 | size_t image_view_index{}; | ||
| 87 | GLsizei sampler_binding{}; | ||
| 88 | |||
| 89 | texture_cache.SynchronizeGraphicsDescriptors(); | ||
| 90 | |||
| 91 | buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); | ||
| 92 | buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); | ||
| 93 | |||
| 94 | const auto& regs{maxwell3d.regs}; | ||
| 95 | const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||
| 96 | const auto config_stage{[&](size_t stage) { | ||
| 97 | const Shader::Info& info{stage_infos[stage]}; | ||
| 98 | buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); | ||
| 99 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 100 | if constexpr (Spec::has_storage_buffers) { | ||
| 101 | size_t ssbo_index{}; | ||
| 102 | for (const auto& desc : info.storage_buffers_descriptors) { | ||
| 103 | ASSERT(desc.count == 1); | ||
| 104 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, | ||
| 105 | desc.cbuf_offset, desc.is_written); | ||
| 106 | ++ssbo_index; | ||
| 107 | } | ||
| 108 | } | ||
| 109 | const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; | ||
| 110 | const auto read_handle{[&](const auto& desc, u32 index) { | ||
| 111 | ASSERT(cbufs[desc.cbuf_index].enabled); | ||
| 112 | const u32 index_offset{index << desc.size_shift}; | ||
| 113 | const u32 offset{desc.cbuf_offset + index_offset}; | ||
| 114 | const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; | ||
| 115 | if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||
| 116 | std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||
| 117 | if (desc.has_secondary) { | ||
| 118 | ASSERT(cbufs[desc.secondary_cbuf_index].enabled); | ||
| 119 | const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; | ||
| 120 | const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + | ||
| 121 | second_offset}; | ||
| 122 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||
| 123 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||
| 124 | const u32 raw{lhs_raw | rhs_raw}; | ||
| 125 | return TexturePair(raw, via_header_index); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||
| 129 | }}; | ||
| 130 | const auto add_image{[&](const auto& desc) { | ||
| 131 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 132 | const auto handle{read_handle(desc, index)}; | ||
| 133 | image_view_indices[image_view_index++] = handle.first; | ||
| 134 | } | ||
| 135 | }}; | ||
| 136 | if constexpr (Spec::has_texture_buffers) { | ||
| 137 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 138 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 139 | const auto handle{read_handle(desc, index)}; | ||
| 140 | image_view_indices[image_view_index++] = handle.first; | ||
| 141 | samplers[sampler_binding++] = 0; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | } | ||
| 145 | if constexpr (Spec::has_image_buffers) { | ||
| 146 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 147 | add_image(desc); | ||
| 148 | } | ||
| 149 | } | ||
| 150 | for (const auto& desc : info.texture_descriptors) { | ||
| 151 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 152 | const auto handle{read_handle(desc, index)}; | ||
| 153 | image_view_indices[image_view_index++] = handle.first; | ||
| 154 | |||
| 155 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; | ||
| 156 | samplers[sampler_binding++] = sampler->Handle(); | ||
| 157 | } | ||
| 158 | } | ||
| 159 | if constexpr (Spec::has_images) { | ||
| 160 | for (const auto& desc : info.image_descriptors) { | ||
| 161 | add_image(desc); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | }}; | ||
| 165 | if constexpr (Spec::enabled_stages[0]) { | ||
| 166 | config_stage(0); | ||
| 167 | } | ||
| 168 | if constexpr (Spec::enabled_stages[1]) { | ||
| 169 | config_stage(1); | ||
| 170 | } | ||
| 171 | if constexpr (Spec::enabled_stages[2]) { | ||
| 172 | config_stage(2); | ||
| 173 | } | ||
| 174 | if constexpr (Spec::enabled_stages[3]) { | ||
| 175 | config_stage(3); | ||
| 176 | } | ||
| 177 | if constexpr (Spec::enabled_stages[4]) { | ||
| 178 | config_stage(4); | ||
| 179 | } | ||
| 180 | const std::span indices_span(image_view_indices.data(), image_view_index); | ||
| 181 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||
| 182 | |||
| 183 | ImageId* texture_buffer_index{image_view_ids.data()}; | ||
| 184 | const auto bind_stage_info{[&](size_t stage) { | ||
| 185 | size_t index{}; | ||
| 186 | const auto add_buffer{[&](const auto& desc) { | ||
| 187 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 188 | for (u32 i = 0; i < desc.count; ++i) { | ||
| 189 | bool is_written{false}; | ||
| 190 | if constexpr (is_image) { | ||
| 191 | is_written = desc.is_written; | ||
| 192 | } | ||
| 193 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | ||
| 194 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | ||
| 195 | image_view.BufferSize(), image_view.format, | ||
| 196 | is_written, is_image); | ||
| 197 | ++index; | ||
| 198 | ++texture_buffer_index; | ||
| 199 | } | ||
| 200 | }}; | ||
| 201 | const Shader::Info& info{stage_infos[stage]}; | ||
| 202 | buffer_cache.UnbindGraphicsTextureBuffers(stage); | ||
| 203 | |||
| 204 | if constexpr (Spec::has_texture_buffers) { | ||
| 205 | for (const auto& desc : info.texture_buffer_descriptors) { | ||
| 206 | add_buffer(desc); | ||
| 207 | } | ||
| 208 | } | ||
| 209 | if constexpr (Spec::has_image_buffers) { | ||
| 210 | for (const auto& desc : info.image_buffer_descriptors) { | ||
| 211 | add_buffer(desc); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | for (const auto& desc : info.texture_descriptors) { | ||
| 215 | texture_buffer_index += desc.count; | ||
| 216 | } | ||
| 217 | if constexpr (Spec::has_images) { | ||
| 218 | for (const auto& desc : info.image_descriptors) { | ||
| 219 | texture_buffer_index += desc.count; | ||
| 220 | } | ||
| 221 | } | ||
| 222 | }}; | ||
| 223 | if constexpr (Spec::enabled_stages[0]) { | ||
| 224 | bind_stage_info(0); | ||
| 225 | } | ||
| 226 | if constexpr (Spec::enabled_stages[1]) { | ||
| 227 | bind_stage_info(1); | ||
| 228 | } | ||
| 229 | if constexpr (Spec::enabled_stages[2]) { | ||
| 230 | bind_stage_info(2); | ||
| 231 | } | ||
| 232 | if constexpr (Spec::enabled_stages[3]) { | ||
| 233 | bind_stage_info(3); | ||
| 234 | } | ||
| 235 | if constexpr (Spec::enabled_stages[4]) { | ||
| 236 | bind_stage_info(4); | ||
| 237 | } | ||
| 238 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 239 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 240 | |||
| 241 | const ImageId* views_it{image_view_ids.data()}; | ||
| 242 | GLsizei texture_binding = 0; | ||
| 243 | GLsizei image_binding = 0; | ||
| 244 | std::array<GLuint, MAX_TEXTURES> textures; | ||
| 245 | std::array<GLuint, MAX_IMAGES> images; | ||
| 246 | const auto prepare_stage{[&](size_t stage) { | ||
| 247 | buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); | ||
| 248 | buffer_cache.BindHostStageBuffers(stage); | ||
| 249 | |||
| 250 | texture_binding += num_texture_buffers[stage]; | ||
| 251 | image_binding += num_image_buffers[stage]; | ||
| 252 | |||
| 253 | const auto& info{stage_infos[stage]}; | ||
| 254 | for (const auto& desc : info.texture_descriptors) { | ||
| 255 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 256 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 257 | textures[texture_binding++] = image_view.Handle(desc.type); | ||
| 258 | } | ||
| 259 | } | ||
| 260 | for (const auto& desc : info.image_descriptors) { | ||
| 261 | for (u32 index = 0; index < desc.count; ++index) { | ||
| 262 | ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||
| 263 | images[image_binding++] = image_view.Handle(desc.type); | ||
| 264 | } | ||
| 265 | } | ||
| 266 | }}; | ||
| 267 | if constexpr (Spec::enabled_stages[0]) { | ||
| 268 | prepare_stage(0); | ||
| 269 | } | ||
| 270 | if constexpr (Spec::enabled_stages[1]) { | ||
| 271 | prepare_stage(1); | ||
| 272 | } | ||
| 273 | if constexpr (Spec::enabled_stages[2]) { | ||
| 274 | prepare_stage(2); | ||
| 275 | } | ||
| 276 | if constexpr (Spec::enabled_stages[3]) { | ||
| 277 | prepare_stage(3); | ||
| 278 | } | ||
| 279 | if constexpr (Spec::enabled_stages[4]) { | ||
| 280 | prepare_stage(4); | ||
| 281 | } | ||
| 282 | if (texture_binding != 0) { | ||
| 283 | ASSERT(texture_binding == sampler_binding); | ||
| 284 | glBindTextures(0, texture_binding, textures.data()); | ||
| 285 | glBindSamplers(0, sampler_binding, samplers.data()); | ||
| 286 | } | ||
| 287 | if (image_binding != 0) { | ||
| 288 | glBindImageTextures(0, image_binding, images.data()); | ||
| 289 | } | ||
| 290 | texture_cache.UpdateRenderTargets(false); | ||
| 291 | |||
| 292 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 293 | program_manager.BindProgram(program.handle); | ||
| 294 | } | ||
| 295 | |||
| 296 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_graphics_program.h b/src/video_core/renderer_opengl/gl_graphics_program.h new file mode 100644 index 000000000..5adf3f41e --- /dev/null +++ b/src/video_core/renderer_opengl/gl_graphics_program.h | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <type_traits> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include "common/bit_field.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/shader_info.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/memory_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 19 | |||
| 20 | namespace OpenGL { | ||
| 21 | |||
| 22 | class ProgramManager; | ||
| 23 | |||
| 24 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 25 | |||
| 26 | struct GraphicsProgramKey { | ||
| 27 | struct TransformFeedbackState { | ||
| 28 | struct Layout { | ||
| 29 | u32 stream; | ||
| 30 | u32 varying_count; | ||
| 31 | u32 stride; | ||
| 32 | }; | ||
| 33 | std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts; | ||
| 34 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings; | ||
| 35 | }; | ||
| 36 | |||
| 37 | std::array<u64, 6> unique_hashes; | ||
| 38 | union { | ||
| 39 | u32 raw; | ||
| 40 | BitField<0, 1, u32> xfb_enabled; | ||
| 41 | BitField<1, 1, u32> early_z; | ||
| 42 | BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; | ||
| 43 | BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; | ||
| 44 | BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; | ||
| 45 | BitField<10, 1, u32> tessellation_clockwise; | ||
| 46 | }; | ||
| 47 | std::array<u32, 3> padding; | ||
| 48 | TransformFeedbackState xfb_state; | ||
| 49 | |||
| 50 | size_t Hash() const noexcept; | ||
| 51 | |||
| 52 | bool operator==(const GraphicsProgramKey&) const noexcept; | ||
| 53 | |||
| 54 | bool operator!=(const GraphicsProgramKey& rhs) const noexcept { | ||
| 55 | return !operator==(rhs); | ||
| 56 | } | ||
| 57 | |||
| 58 | [[nodiscard]] size_t Size() const noexcept { | ||
| 59 | if (xfb_enabled != 0) { | ||
| 60 | return sizeof(GraphicsProgramKey); | ||
| 61 | } else { | ||
| 62 | return offsetof(GraphicsProgramKey, padding); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | }; | ||
| 66 | static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>); | ||
| 67 | static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>); | ||
| 68 | static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>); | ||
| 69 | |||
| 70 | class GraphicsProgram { | ||
| 71 | public: | ||
| 72 | explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 73 | Tegra::MemoryManager& gpu_memory_, | ||
| 74 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 75 | ProgramManager& program_manager_, StateTracker& state_tracker_, | ||
| 76 | OGLProgram program_, const std::array<const Shader::Info*, 5>& infos); | ||
| 77 | |||
| 78 | void Configure(bool is_indexed); | ||
| 79 | |||
| 80 | private: | ||
| 81 | TextureCache& texture_cache; | ||
| 82 | BufferCache& buffer_cache; | ||
| 83 | Tegra::MemoryManager& gpu_memory; | ||
| 84 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 85 | ProgramManager& program_manager; | ||
| 86 | StateTracker& state_tracker; | ||
| 87 | |||
| 88 | OGLProgram program; | ||
| 89 | std::array<Shader::Info, 5> stage_infos{}; | ||
| 90 | std::array<u32, 5> base_uniform_bindings{}; | ||
| 91 | std::array<u32, 5> base_storage_bindings{}; | ||
| 92 | std::array<u32, 5> num_texture_buffers{}; | ||
| 93 | std::array<u32, 5> num_image_buffers{}; | ||
| 94 | }; | ||
| 95 | |||
| 96 | } // namespace OpenGL | ||
| 97 | |||
| 98 | namespace std { | ||
| 99 | template <> | ||
| 100 | struct hash<OpenGL::GraphicsProgramKey> { | ||
| 101 | size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { | ||
| 102 | return k.Hash(); | ||
| 103 | } | ||
| 104 | }; | ||
| 105 | } // namespace std | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index dd1937863..e527b76ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 98 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 98 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 99 | buffer_cache_runtime(device), | 99 | buffer_cache_runtime(device), |
| 100 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | 100 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |
| 101 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 101 | shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, |
| 102 | buffer_cache, program_manager, state_tracker), | ||
| 102 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | 103 | query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), |
| 103 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} | 104 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} |
| 104 | 105 | ||
| @@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 246 | 247 | ||
| 247 | SyncState(); | 248 | SyncState(); |
| 248 | 249 | ||
| 249 | // Setup shaders and their used resources. | 250 | GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; |
| 250 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 251 | 251 | ||
| 252 | texture_cache.UpdateRenderTargets(false); | 252 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 253 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 253 | program->Configure(is_indexed); |
| 254 | program_manager.BindGraphicsPipeline(); | ||
| 255 | 254 | ||
| 256 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); | 255 | const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); |
| 257 | BeginTransformFeedback(primitive_mode); | 256 | BeginTransformFeedback(primitive_mode); |
| @@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 293 | num_instances, base_instance); | 292 | num_instances, base_instance); |
| 294 | } | 293 | } |
| 295 | } | 294 | } |
| 296 | |||
| 297 | EndTransformFeedback(); | 295 | EndTransformFeedback(); |
| 298 | 296 | ||
| 299 | ++num_queued_commands; | 297 | ++num_queued_commands; |
| @@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 302 | } | 300 | } |
| 303 | 301 | ||
| 304 | void RasterizerOpenGL::DispatchCompute() { | 302 | void RasterizerOpenGL::DispatchCompute() { |
| 305 | UNREACHABLE_MSG("Not implemented"); | 303 | ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; |
| 304 | if (!program) { | ||
| 305 | return; | ||
| 306 | } | ||
| 307 | program->Configure(); | ||
| 308 | const auto& qmd{kepler_compute.launch_description}; | ||
| 309 | glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); | ||
| 310 | ++num_queued_commands; | ||
| 306 | } | 311 | } |
| 307 | 312 | ||
| 308 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | 313 | void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { |
| @@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 515 | // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); | 520 | // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); |
| 516 | // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); | 521 | // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); |
| 517 | 522 | ||
| 518 | screen_info.display_texture = image_view->Handle(ImageViewType::e2D); | 523 | screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); |
| 519 | screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); | 524 | screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); |
| 520 | return true; | 525 | return true; |
| 521 | } | 526 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c3e490b40..c9ca1f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -16,6 +16,11 @@ | |||
| 16 | #include "common/scope_exit.h" | 16 | #include "common/scope_exit.h" |
| 17 | #include "core/core.h" | 17 | #include "core/core.h" |
| 18 | #include "core/frontend/emu_window.h" | 18 | #include "core/frontend/emu_window.h" |
| 19 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 20 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 21 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 22 | #include "shader_recompiler/frontend/maxwell/program.h" | ||
| 23 | #include "shader_recompiler/profile.h" | ||
| 19 | #include "video_core/engines/kepler_compute.h" | 24 | #include "video_core/engines/kepler_compute.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 26 | #include "video_core/engines/shader_type.h" |
| @@ -25,17 +30,281 @@ | |||
| 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 26 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 31 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 27 | #include "video_core/shader_cache.h" | 32 | #include "video_core/shader_cache.h" |
| 33 | #include "video_core/shader_environment.h" | ||
| 28 | #include "video_core/shader_notify.h" | 34 | #include "video_core/shader_notify.h" |
| 29 | 35 | ||
| 30 | namespace OpenGL { | 36 | namespace OpenGL { |
| 37 | namespace { | ||
| 38 | // FIXME: Move this somewhere else | ||
| 39 | const Shader::Profile profile{ | ||
| 40 | .supported_spirv = 0x00010000, | ||
| 41 | |||
| 42 | .unified_descriptor_binding = false, | ||
| 43 | .support_descriptor_aliasing = false, | ||
| 44 | .support_int8 = false, | ||
| 45 | .support_int16 = false, | ||
| 46 | .support_vertex_instance_id = true, | ||
| 47 | .support_float_controls = false, | ||
| 48 | .support_separate_denorm_behavior = false, | ||
| 49 | .support_separate_rounding_mode = false, | ||
| 50 | .support_fp16_denorm_preserve = false, | ||
| 51 | .support_fp32_denorm_preserve = false, | ||
| 52 | .support_fp16_denorm_flush = false, | ||
| 53 | .support_fp32_denorm_flush = false, | ||
| 54 | .support_fp16_signed_zero_nan_preserve = false, | ||
| 55 | .support_fp32_signed_zero_nan_preserve = false, | ||
| 56 | .support_fp64_signed_zero_nan_preserve = false, | ||
| 57 | .support_explicit_workgroup_layout = false, | ||
| 58 | .support_vote = true, | ||
| 59 | .support_viewport_index_layer_non_geometry = true, | ||
| 60 | .support_viewport_mask = true, | ||
| 61 | .support_typeless_image_loads = true, | ||
| 62 | .support_demote_to_helper_invocation = false, | ||
| 63 | .warp_size_potentially_larger_than_guest = true, | ||
| 64 | .support_int64_atomics = false, | ||
| 65 | .lower_left_origin_mode = true, | ||
| 66 | |||
| 67 | .has_broken_spirv_clamp = true, | ||
| 68 | .has_broken_unsigned_image_offsets = true, | ||
| 69 | .has_broken_signed_operations = true, | ||
| 70 | .ignore_nan_fp_comparisons = true, | ||
| 71 | |||
| 72 | .generic_input_types = {}, | ||
| 73 | .convert_depth_mode = false, | ||
| 74 | .force_early_z = false, | ||
| 75 | |||
| 76 | .tess_primitive = {}, | ||
| 77 | .tess_spacing = {}, | ||
| 78 | .tess_clockwise = false, | ||
| 79 | |||
| 80 | .input_topology = Shader::InputTopology::Triangles, | ||
| 81 | |||
| 82 | .fixed_state_point_size = std::nullopt, | ||
| 83 | |||
| 84 | .alpha_test_func = Shader::CompareFunction::Always, | ||
| 85 | .alpha_test_reference = 0.0f, | ||
| 86 | |||
| 87 | .y_negate = false, | ||
| 88 | |||
| 89 | .xfb_varyings = {}, | ||
| 90 | }; | ||
| 91 | |||
| 92 | using Shader::Backend::SPIRV::EmitSPIRV; | ||
| 93 | using Shader::Maxwell::TranslateProgram; | ||
| 94 | using VideoCommon::ComputeEnvironment; | ||
| 95 | using VideoCommon::GraphicsEnvironment; | ||
| 96 | |||
| 97 | template <typename Container> | ||
| 98 | auto MakeSpan(Container& container) { | ||
| 99 | return std::span(container.data(), container.size()); | ||
| 100 | } | ||
| 101 | |||
| 102 | void AddShader(GLenum stage, GLuint program, std::span<const u32> code) { | ||
| 103 | OGLShader shader; | ||
| 104 | shader.handle = glCreateShader(stage); | ||
| 105 | |||
| 106 | glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), | ||
| 107 | static_cast<GLsizei>(code.size_bytes())); | ||
| 108 | glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); | ||
| 109 | glAttachShader(program, shader.handle); | ||
| 110 | if (!Settings::values.renderer_debug) { | ||
| 111 | return; | ||
| 112 | } | ||
| 113 | GLint shader_status{}; | ||
| 114 | glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); | ||
| 115 | if (shader_status == GL_FALSE) { | ||
| 116 | LOG_ERROR(Render_OpenGL, "Failed to build shader"); | ||
| 117 | } | ||
| 118 | GLint log_length{}; | ||
| 119 | glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); | ||
| 120 | if (log_length == 0) { | ||
| 121 | return; | ||
| 122 | } | ||
| 123 | std::string log(log_length, 0); | ||
| 124 | glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); | ||
| 125 | if (shader_status == GL_FALSE) { | ||
| 126 | LOG_ERROR(Render_OpenGL, "{}", log); | ||
| 127 | } else { | ||
| 128 | LOG_WARNING(Render_OpenGL, "{}", log); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | |||
| 132 | void LinkProgram(GLuint program) { | ||
| 133 | glLinkProgram(program); | ||
| 134 | if (!Settings::values.renderer_debug) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | GLint link_status{}; | ||
| 138 | glGetProgramiv(program, GL_LINK_STATUS, &link_status); | ||
| 139 | |||
| 140 | GLint log_length{}; | ||
| 141 | glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); | ||
| 142 | if (log_length == 0) { | ||
| 143 | return; | ||
| 144 | } | ||
| 145 | std::string log(log_length, 0); | ||
| 146 | glGetProgramInfoLog(program, log_length, nullptr, log.data()); | ||
| 147 | if (link_status == GL_FALSE) { | ||
| 148 | LOG_ERROR(Render_OpenGL, "{}", log); | ||
| 149 | } else { | ||
| 150 | LOG_WARNING(Render_OpenGL, "{}", log); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | GLenum Stage(size_t stage_index) { | ||
| 155 | switch (stage_index) { | ||
| 156 | case 0: | ||
| 157 | return GL_VERTEX_SHADER; | ||
| 158 | case 1: | ||
| 159 | return GL_TESS_CONTROL_SHADER; | ||
| 160 | case 2: | ||
| 161 | return GL_TESS_EVALUATION_SHADER; | ||
| 162 | case 3: | ||
| 163 | return GL_GEOMETRY_SHADER; | ||
| 164 | case 4: | ||
| 165 | return GL_FRAGMENT_SHADER; | ||
| 166 | } | ||
| 167 | UNREACHABLE_MSG("{}", stage_index); | ||
| 168 | return GL_NONE; | ||
| 169 | } | ||
| 170 | } // Anonymous namespace | ||
| 31 | 171 | ||
| 32 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 172 | ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 33 | Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, | 173 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 34 | Tegra::Engines::KeplerCompute& kepler_compute_, | 174 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 35 | Tegra::MemoryManager& gpu_memory_, const Device& device_) | 175 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 176 | TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 177 | ProgramManager& program_manager_, StateTracker& state_tracker_) | ||
| 36 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, | 178 | : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, |
| 37 | emu_window{emu_window_}, gpu{gpu_}, device{device_} {} | 179 | emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, |
| 180 | buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ | ||
| 181 | state_tracker_} {} | ||
| 38 | 182 | ||
| 39 | ShaderCache::~ShaderCache() = default; | 183 | ShaderCache::~ShaderCache() = default; |
| 40 | 184 | ||
| 185 | GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { | ||
| 186 | if (!RefreshStages(graphics_key.unique_hashes)) { | ||
| 187 | return nullptr; | ||
| 188 | } | ||
| 189 | const auto& regs{maxwell3d.regs}; | ||
| 190 | graphics_key.raw = 0; | ||
| 191 | graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); | ||
| 192 | graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 | ||
| 193 | ? regs.draw.topology.Value() | ||
| 194 | : Maxwell::PrimitiveTopology{}); | ||
| 195 | graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); | ||
| 196 | graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); | ||
| 197 | graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); | ||
| 198 | |||
| 199 | const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; | ||
| 200 | auto& program{pair->second}; | ||
| 201 | if (is_new) { | ||
| 202 | program = CreateGraphicsProgram(); | ||
| 203 | } | ||
| 204 | return program.get(); | ||
| 205 | } | ||
| 206 | |||
| 207 | ComputeProgram* ShaderCache::CurrentComputeProgram() { | ||
| 208 | const VideoCommon::ShaderInfo* const shader{ComputeShader()}; | ||
| 209 | if (!shader) { | ||
| 210 | return nullptr; | ||
| 211 | } | ||
| 212 | const auto& qmd{kepler_compute.launch_description}; | ||
| 213 | const ComputeProgramKey key{ | ||
| 214 | .unique_hash = shader->unique_hash, | ||
| 215 | .shared_memory_size = qmd.shared_alloc, | ||
| 216 | .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, | ||
| 217 | }; | ||
| 218 | const auto [pair, is_new]{compute_cache.try_emplace(key)}; | ||
| 219 | auto& pipeline{pair->second}; | ||
| 220 | if (!is_new) { | ||
| 221 | return pipeline.get(); | ||
| 222 | } | ||
| 223 | pipeline = CreateComputeProgram(key, shader); | ||
| 224 | return pipeline.get(); | ||
| 225 | } | ||
| 226 | |||
| 227 | std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram() { | ||
| 228 | GraphicsEnvironments environments; | ||
| 229 | GetGraphicsEnvironments(environments, graphics_key.unique_hashes); | ||
| 230 | |||
| 231 | main_pools.ReleaseContents(); | ||
| 232 | return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); | ||
| 233 | } | ||
| 234 | |||
| 235 | std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram( | ||
| 236 | ShaderPools& pools, const GraphicsProgramKey& key, std::span<Shader::Environment* const> envs, | ||
| 237 | bool build_in_parallel) { | ||
| 238 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | ||
| 239 | size_t env_index{0}; | ||
| 240 | std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||
| 241 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 242 | if (key.unique_hashes[index] == 0) { | ||
| 243 | continue; | ||
| 244 | } | ||
| 245 | Shader::Environment& env{*envs[env_index]}; | ||
| 246 | ++env_index; | ||
| 247 | |||
| 248 | const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | ||
| 249 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); | ||
| 250 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||
| 251 | } | ||
| 252 | std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | ||
| 253 | |||
| 254 | OGLProgram gl_program; | ||
| 255 | gl_program.handle = glCreateProgram(); | ||
| 256 | |||
| 257 | Shader::Backend::SPIRV::Bindings binding; | ||
| 258 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||
| 259 | if (key.unique_hashes[index] == 0) { | ||
| 260 | continue; | ||
| 261 | } | ||
| 262 | UNIMPLEMENTED_IF(index == 0); | ||
| 263 | |||
| 264 | Shader::IR::Program& program{programs[index]}; | ||
| 265 | const size_t stage_index{index - 1}; | ||
| 266 | infos[stage_index] = &program.info; | ||
| 267 | |||
| 268 | const std::vector<u32> code{EmitSPIRV(profile, program, binding)}; | ||
| 269 | FILE* file = fopen("D:\\shader.spv", "wb"); | ||
| 270 | fwrite(code.data(), 4, code.size(), file); | ||
| 271 | fclose(file); | ||
| 272 | AddShader(Stage(stage_index), gl_program.handle, code); | ||
| 273 | } | ||
| 274 | LinkProgram(gl_program.handle); | ||
| 275 | |||
| 276 | return std::make_unique<GraphicsProgram>(texture_cache, buffer_cache, gpu_memory, maxwell3d, | ||
| 277 | program_manager, state_tracker, std::move(gl_program), | ||
| 278 | infos); | ||
| 279 | } | ||
| 280 | |||
| 281 | std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram( | ||
| 282 | const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { | ||
| 283 | const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | ||
| 284 | const auto& qmd{kepler_compute.launch_description}; | ||
| 285 | ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | ||
| 286 | env.SetCachedSize(shader->size_bytes); | ||
| 287 | |||
| 288 | main_pools.ReleaseContents(); | ||
| 289 | return CreateComputeProgram(main_pools, key, env, true); | ||
| 290 | } | ||
| 291 | |||
| 292 | std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(ShaderPools& pools, | ||
| 293 | const ComputeProgramKey& key, | ||
| 294 | Shader::Environment& env, | ||
| 295 | bool build_in_parallel) { | ||
| 296 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | ||
| 297 | |||
| 298 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | ||
| 299 | Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||
| 300 | Shader::Backend::SPIRV::Bindings binding; | ||
| 301 | const std::vector<u32> code{EmitSPIRV(profile, program, binding)}; | ||
| 302 | OGLProgram gl_program; | ||
| 303 | gl_program.handle = glCreateProgram(); | ||
| 304 | AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); | ||
| 305 | LinkProgram(gl_program.handle); | ||
| 306 | return std::make_unique<ComputeProgram>(texture_cache, buffer_cache, gpu_memory, kepler_compute, | ||
| 307 | program_manager, std::move(gl_program), program.info); | ||
| 308 | } | ||
| 309 | |||
| 41 | } // namespace OpenGL | 310 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 96520e17c..b479d073a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -5,20 +5,18 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | ||
| 9 | #include <bitset> | ||
| 10 | #include <memory> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <unordered_map> | 8 | #include <unordered_map> |
| 14 | #include <unordered_set> | ||
| 15 | #include <vector> | ||
| 16 | 9 | ||
| 17 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 18 | 11 | ||
| 19 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 16 | #include "shader_recompiler/object_pool.h" | ||
| 20 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 18 | #include "video_core/renderer_opengl/gl_compute_program.h" |
| 19 | #include "video_core/renderer_opengl/gl_graphics_program.h" | ||
| 22 | #include "video_core/shader_cache.h" | 20 | #include "video_core/shader_cache.h" |
| 23 | 21 | ||
| 24 | namespace Tegra { | 22 | namespace Tegra { |
| @@ -32,64 +30,62 @@ class EmuWindow; | |||
| 32 | namespace OpenGL { | 30 | namespace OpenGL { |
| 33 | 31 | ||
| 34 | class Device; | 32 | class Device; |
| 33 | class ProgramManager; | ||
| 35 | class RasterizerOpenGL; | 34 | class RasterizerOpenGL; |
| 36 | 35 | ||
| 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 36 | struct ShaderPools { |
| 38 | 37 | void ReleaseContents() { | |
| 39 | struct GraphicsProgramKey { | 38 | flow_block.ReleaseContents(); |
| 40 | struct TransformFeedbackState { | 39 | block.ReleaseContents(); |
| 41 | struct Layout { | 40 | inst.ReleaseContents(); |
| 42 | u32 stream; | ||
| 43 | u32 varying_count; | ||
| 44 | u32 stride; | ||
| 45 | }; | ||
| 46 | std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts; | ||
| 47 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings; | ||
| 48 | }; | ||
| 49 | |||
| 50 | std::array<u64, 6> unique_hashes; | ||
| 51 | std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||
| 52 | union { | ||
| 53 | u32 raw; | ||
| 54 | BitField<0, 1, u32> xfb_enabled; | ||
| 55 | BitField<1, 1, u32> early_z; | ||
| 56 | BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; | ||
| 57 | BitField<6, 2, u32> tessellation_primitive; | ||
| 58 | BitField<8, 2, u32> tessellation_spacing; | ||
| 59 | BitField<10, 1, u32> tessellation_clockwise; | ||
| 60 | }; | ||
| 61 | u32 padding; | ||
| 62 | TransformFeedbackState xfb_state; | ||
| 63 | |||
| 64 | [[nodiscard]] size_t Size() const noexcept { | ||
| 65 | if (xfb_enabled != 0) { | ||
| 66 | return sizeof(GraphicsProgramKey); | ||
| 67 | } else { | ||
| 68 | return offsetof(GraphicsProgramKey, padding); | ||
| 69 | } | ||
| 70 | } | 41 | } |
| 71 | }; | ||
| 72 | static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>); | ||
| 73 | static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>); | ||
| 74 | static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>); | ||
| 75 | 42 | ||
| 76 | class GraphicsProgram { | 43 | Shader::ObjectPool<Shader::IR::Inst> inst; |
| 77 | public: | 44 | Shader::ObjectPool<Shader::IR::Block> block; |
| 78 | private: | 45 | Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; |
| 79 | }; | 46 | }; |
| 80 | 47 | ||
| 81 | class ShaderCache : public VideoCommon::ShaderCache { | 48 | class ShaderCache : public VideoCommon::ShaderCache { |
| 82 | public: | 49 | public: |
| 83 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | 50 | explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, |
| 84 | Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, | 51 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 85 | Tegra::Engines::KeplerCompute& kepler_compute_, | 52 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 86 | Tegra::MemoryManager& gpu_memory_, const Device& device_); | 53 | Tegra::MemoryManager& gpu_memory_, const Device& device_, |
| 54 | TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||
| 55 | ProgramManager& program_manager_, StateTracker& state_tracker_); | ||
| 87 | ~ShaderCache(); | 56 | ~ShaderCache(); |
| 88 | 57 | ||
| 58 | [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); | ||
| 59 | |||
| 60 | [[nodiscard]] ComputeProgram* CurrentComputeProgram(); | ||
| 61 | |||
| 89 | private: | 62 | private: |
| 63 | std::unique_ptr<GraphicsProgram> CreateGraphicsProgram(); | ||
| 64 | |||
| 65 | std::unique_ptr<GraphicsProgram> CreateGraphicsProgram( | ||
| 66 | ShaderPools& pools, const GraphicsProgramKey& key, | ||
| 67 | std::span<Shader::Environment* const> envs, bool build_in_parallel); | ||
| 68 | |||
| 69 | std::unique_ptr<ComputeProgram> CreateComputeProgram(const ComputeProgramKey& key, | ||
| 70 | const VideoCommon::ShaderInfo* shader); | ||
| 71 | |||
| 72 | std::unique_ptr<ComputeProgram> CreateComputeProgram(ShaderPools& pools, | ||
| 73 | const ComputeProgramKey& key, | ||
| 74 | Shader::Environment& env, | ||
| 75 | bool build_in_parallel); | ||
| 76 | |||
| 90 | Core::Frontend::EmuWindow& emu_window; | 77 | Core::Frontend::EmuWindow& emu_window; |
| 91 | Tegra::GPU& gpu; | ||
| 92 | const Device& device; | 78 | const Device& device; |
| 79 | TextureCache& texture_cache; | ||
| 80 | BufferCache& buffer_cache; | ||
| 81 | ProgramManager& program_manager; | ||
| 82 | StateTracker& state_tracker; | ||
| 83 | |||
| 84 | GraphicsProgramKey graphics_key{}; | ||
| 85 | |||
| 86 | ShaderPools main_pools; | ||
| 87 | std::unordered_map<GraphicsProgramKey, std::unique_ptr<GraphicsProgram>> graphics_cache; | ||
| 88 | std::unordered_map<ComputeProgramKey, std::unique_ptr<ComputeProgram>> compute_cache; | ||
| 93 | }; | 89 | }; |
| 94 | 90 | ||
| 95 | } // namespace OpenGL | 91 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 553e6e8d6..399959afb 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -1,149 +1,3 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | 1 | // Copyright 2018 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 11 | |||
| 12 | namespace OpenGL { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | |||
| 16 | void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { | ||
| 17 | if (current == old) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | if (current == 0) { | ||
| 21 | if (enabled) { | ||
| 22 | enabled = false; | ||
| 23 | glDisable(stage); | ||
| 24 | } | ||
| 25 | return; | ||
| 26 | } | ||
| 27 | if (!enabled) { | ||
| 28 | enabled = true; | ||
| 29 | glEnable(stage); | ||
| 30 | } | ||
| 31 | glBindProgramARB(stage, current); | ||
| 32 | } | ||
| 33 | |||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | ProgramManager::ProgramManager(const Device& device) | ||
| 37 | : use_assembly_programs{device.UseAssemblyShaders()} { | ||
| 38 | if (use_assembly_programs) { | ||
| 39 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 40 | } else { | ||
| 41 | graphics_pipeline.Create(); | ||
| 42 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | ProgramManager::~ProgramManager() = default; | ||
| 47 | |||
| 48 | void ProgramManager::BindCompute(GLuint program) { | ||
| 49 | if (use_assembly_programs) { | ||
| 50 | glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); | ||
| 51 | } else { | ||
| 52 | is_graphics_bound = false; | ||
| 53 | glUseProgram(program); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | void ProgramManager::BindGraphicsPipeline() { | ||
| 58 | if (!use_assembly_programs) { | ||
| 59 | UpdateSourcePrograms(); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | void ProgramManager::BindHostPipeline(GLuint pipeline) { | ||
| 64 | if (use_assembly_programs) { | ||
| 65 | if (geometry_enabled) { | ||
| 66 | geometry_enabled = false; | ||
| 67 | old_state.geometry = 0; | ||
| 68 | glDisable(GL_GEOMETRY_PROGRAM_NV); | ||
| 69 | } | ||
| 70 | } else { | ||
| 71 | if (!is_graphics_bound) { | ||
| 72 | glUseProgram(0); | ||
| 73 | } | ||
| 74 | } | ||
| 75 | glBindProgramPipeline(pipeline); | ||
| 76 | } | ||
| 77 | |||
| 78 | void ProgramManager::RestoreGuestPipeline() { | ||
| 79 | if (use_assembly_programs) { | ||
| 80 | glBindProgramPipeline(0); | ||
| 81 | } else { | ||
| 82 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 83 | } | ||
| 84 | } | ||
| 85 | |||
| 86 | void ProgramManager::BindHostCompute(GLuint program) { | ||
| 87 | if (use_assembly_programs) { | ||
| 88 | glDisable(GL_COMPUTE_PROGRAM_NV); | ||
| 89 | } | ||
| 90 | glUseProgram(program); | ||
| 91 | is_graphics_bound = false; | ||
| 92 | } | ||
| 93 | |||
| 94 | void ProgramManager::RestoreGuestCompute() { | ||
| 95 | if (use_assembly_programs) { | ||
| 96 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 97 | glUseProgram(0); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | void ProgramManager::UseVertexShader(GLuint program) { | ||
| 102 | if (use_assembly_programs) { | ||
| 103 | BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); | ||
| 104 | } | ||
| 105 | current_state.vertex = program; | ||
| 106 | } | ||
| 107 | |||
| 108 | void ProgramManager::UseGeometryShader(GLuint program) { | ||
| 109 | if (use_assembly_programs) { | ||
| 110 | BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); | ||
| 111 | } | ||
| 112 | current_state.geometry = program; | ||
| 113 | } | ||
| 114 | |||
| 115 | void ProgramManager::UseFragmentShader(GLuint program) { | ||
| 116 | if (use_assembly_programs) { | ||
| 117 | BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); | ||
| 118 | } | ||
| 119 | current_state.fragment = program; | ||
| 120 | } | ||
| 121 | |||
| 122 | void ProgramManager::UpdateSourcePrograms() { | ||
| 123 | if (!is_graphics_bound) { | ||
| 124 | is_graphics_bound = true; | ||
| 125 | glUseProgram(0); | ||
| 126 | } | ||
| 127 | |||
| 128 | const GLuint handle = graphics_pipeline.handle; | ||
| 129 | const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { | ||
| 130 | if (current == old) { | ||
| 131 | return; | ||
| 132 | } | ||
| 133 | glUseProgramStages(handle, stage, current); | ||
| 134 | }; | ||
| 135 | update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); | ||
| 136 | update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); | ||
| 137 | update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); | ||
| 138 | |||
| 139 | old_state = current_state; | ||
| 140 | } | ||
| 141 | |||
| 142 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | ||
| 143 | const auto& regs = maxwell.regs; | ||
| 144 | |||
| 145 | // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. | ||
| 146 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index ad42cce74..70781d6f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -4,79 +4,24 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 10 | 8 | ||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||
| 13 | |||
| 14 | namespace OpenGL { | 9 | namespace OpenGL { |
| 15 | 10 | ||
| 16 | class Device; | ||
| 17 | |||
| 18 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | ||
| 19 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at | ||
| 20 | /// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. | ||
| 21 | /// Not following that rule will cause problems on some AMD drivers. | ||
| 22 | struct alignas(16) MaxwellUniformData { | ||
| 23 | void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); | ||
| 24 | |||
| 25 | GLfloat y_direction; | ||
| 26 | }; | ||
| 27 | static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); | ||
| 28 | static_assert(sizeof(MaxwellUniformData) < 16384, | ||
| 29 | "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | ||
| 30 | |||
| 31 | class ProgramManager { | 11 | class ProgramManager { |
| 32 | public: | 12 | public: |
| 33 | explicit ProgramManager(const Device& device); | 13 | void BindProgram(GLuint program) { |
| 34 | ~ProgramManager(); | 14 | if (bound_program == program) { |
| 35 | 15 | return; | |
| 36 | /// Binds a compute program | 16 | } |
| 37 | void BindCompute(GLuint program); | 17 | bound_program = program; |
| 38 | 18 | glUseProgram(program); | |
| 39 | /// Updates bound programs. | 19 | } |
| 40 | void BindGraphicsPipeline(); | ||
| 41 | |||
| 42 | /// Binds an OpenGL pipeline object unsynchronized with the guest state. | ||
| 43 | void BindHostPipeline(GLuint pipeline); | ||
| 44 | 20 | ||
| 45 | /// Rewinds BindHostPipeline state changes. | 21 | void RestoreGuestCompute() {} |
| 46 | void RestoreGuestPipeline(); | ||
| 47 | |||
| 48 | /// Binds an OpenGL GLSL program object unsynchronized with the guest state. | ||
| 49 | void BindHostCompute(GLuint program); | ||
| 50 | |||
| 51 | /// Rewinds BindHostCompute state changes. | ||
| 52 | void RestoreGuestCompute(); | ||
| 53 | |||
| 54 | void UseVertexShader(GLuint program); | ||
| 55 | void UseGeometryShader(GLuint program); | ||
| 56 | void UseFragmentShader(GLuint program); | ||
| 57 | 22 | ||
| 58 | private: | 23 | private: |
| 59 | struct PipelineState { | 24 | GLuint bound_program = 0; |
| 60 | GLuint vertex = 0; | ||
| 61 | GLuint geometry = 0; | ||
| 62 | GLuint fragment = 0; | ||
| 63 | }; | ||
| 64 | |||
| 65 | /// Update GLSL programs. | ||
| 66 | void UpdateSourcePrograms(); | ||
| 67 | |||
| 68 | OGLPipeline graphics_pipeline; | ||
| 69 | |||
| 70 | PipelineState current_state; | ||
| 71 | PipelineState old_state; | ||
| 72 | |||
| 73 | bool use_assembly_programs = false; | ||
| 74 | |||
| 75 | bool is_graphics_bound = true; | ||
| 76 | |||
| 77 | bool vertex_enabled = false; | ||
| 78 | bool geometry_enabled = false; | ||
| 79 | bool fragment_enabled = false; | ||
| 80 | }; | 25 | }; |
| 81 | 26 | ||
| 82 | } // namespace OpenGL | 27 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index a8bf84218..7053be161 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -24,9 +24,7 @@ | |||
| 24 | #include "video_core/textures/decoders.h" | 24 | #include "video_core/textures/decoders.h" |
| 25 | 25 | ||
| 26 | namespace OpenGL { | 26 | namespace OpenGL { |
| 27 | |||
| 28 | namespace { | 27 | namespace { |
| 29 | |||
| 30 | using Tegra::Texture::SwizzleSource; | 28 | using Tegra::Texture::SwizzleSource; |
| 31 | using Tegra::Texture::TextureMipmapFilter; | 29 | using Tegra::Texture::TextureMipmapFilter; |
| 32 | using Tegra::Texture::TextureType; | 30 | using Tegra::Texture::TextureType; |
| @@ -59,107 +57,6 @@ struct CopyRegion { | |||
| 59 | GLsizei depth; | 57 | GLsizei depth; |
| 60 | }; | 58 | }; |
| 61 | 59 | ||
| 62 | struct FormatTuple { | ||
| 63 | GLenum internal_format; | ||
| 64 | GLenum format = GL_NONE; | ||
| 65 | GLenum type = GL_NONE; | ||
| 66 | }; | ||
| 67 | |||
| 68 | constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | ||
| 69 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM | ||
| 70 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM | ||
| 71 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT | ||
| 72 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT | ||
| 73 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM | ||
| 74 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM | ||
| 75 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM | ||
| 76 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM | ||
| 77 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT | ||
| 78 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM | ||
| 79 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM | ||
| 80 | {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM | ||
| 81 | {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT | ||
| 82 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT | ||
| 83 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT | ||
| 84 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM | ||
| 85 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM | ||
| 86 | {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT | ||
| 87 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT | ||
| 88 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT | ||
| 89 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT | ||
| 90 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM | ||
| 91 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM | ||
| 92 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM | ||
| 93 | {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM | ||
| 94 | {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM | ||
| 95 | {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM | ||
| 96 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM | ||
| 97 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM | ||
| 98 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT | ||
| 99 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT | ||
| 100 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM | ||
| 101 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | ||
| 102 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT | ||
| 103 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT | ||
| 104 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT | ||
| 105 | {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT | ||
| 106 | {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT | ||
| 107 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT | ||
| 108 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM | ||
| 109 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM | ||
| 110 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT | ||
| 111 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT | ||
| 112 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM | ||
| 113 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT | ||
| 114 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT | ||
| 115 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT | ||
| 116 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM | ||
| 117 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT | ||
| 118 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB | ||
| 119 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM | ||
| 120 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM | ||
| 121 | {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT | ||
| 122 | {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT | ||
| 123 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT | ||
| 124 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT | ||
| 125 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT | ||
| 126 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT | ||
| 127 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM | ||
| 128 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | ||
| 129 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | ||
| 130 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB | ||
| 131 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | ||
| 132 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | ||
| 133 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | ||
| 134 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB | ||
| 135 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM | ||
| 136 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB | ||
| 137 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB | ||
| 138 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB | ||
| 139 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB | ||
| 140 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM | ||
| 141 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB | ||
| 142 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM | ||
| 143 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB | ||
| 144 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | ||
| 145 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | ||
| 146 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | ||
| 147 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | ||
| 148 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | ||
| 149 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | ||
| 150 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | ||
| 151 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB | ||
| 152 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM | ||
| 153 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB | ||
| 154 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | ||
| 155 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | ||
| 156 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | ||
| 157 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | ||
| 158 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | ||
| 159 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | ||
| 160 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT | ||
| 161 | }}; | ||
| 162 | |||
| 163 | constexpr std::array ACCELERATED_FORMATS{ | 60 | constexpr std::array ACCELERATED_FORMATS{ |
| 164 | GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, | 61 | GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F, |
| 165 | GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, | 62 | GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI, |
| @@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ | |||
| 170 | GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, | 67 | GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, |
| 171 | }; | 68 | }; |
| 172 | 69 | ||
| 173 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | ||
| 174 | ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); | ||
| 175 | return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; | ||
| 176 | } | ||
| 177 | |||
| 178 | GLenum ImageTarget(const VideoCommon::ImageInfo& info) { | 70 | GLenum ImageTarget(const VideoCommon::ImageInfo& info) { |
| 179 | switch (info.type) { | 71 | switch (info.type) { |
| 180 | case ImageType::e1D: | 72 | case ImageType::e1D: |
| @@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { | |||
| 195 | return GL_NONE; | 87 | return GL_NONE; |
| 196 | } | 88 | } |
| 197 | 89 | ||
| 198 | GLenum ImageTarget(ImageViewType type, int num_samples = 1) { | 90 | GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { |
| 199 | const bool is_multisampled = num_samples > 1; | 91 | const bool is_multisampled = num_samples > 1; |
| 200 | switch (type) { | 92 | switch (type) { |
| 201 | case ImageViewType::e1D: | 93 | case Shader::TextureType::Color1D: |
| 202 | return GL_TEXTURE_1D; | 94 | return GL_TEXTURE_1D; |
| 203 | case ImageViewType::e2D: | 95 | case Shader::TextureType::Color2D: |
| 204 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; | 96 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; |
| 205 | case ImageViewType::Cube: | 97 | case Shader::TextureType::ColorCube: |
| 206 | return GL_TEXTURE_CUBE_MAP; | 98 | return GL_TEXTURE_CUBE_MAP; |
| 207 | case ImageViewType::e3D: | 99 | case Shader::TextureType::Color3D: |
| 208 | return GL_TEXTURE_3D; | 100 | return GL_TEXTURE_3D; |
| 209 | case ImageViewType::e1DArray: | 101 | case Shader::TextureType::ColorArray1D: |
| 210 | return GL_TEXTURE_1D_ARRAY; | 102 | return GL_TEXTURE_1D_ARRAY; |
| 211 | case ImageViewType::e2DArray: | 103 | case Shader::TextureType::ColorArray2D: |
| 212 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; | 104 | return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; |
| 213 | case ImageViewType::CubeArray: | 105 | case Shader::TextureType::ColorArrayCube: |
| 214 | return GL_TEXTURE_CUBE_MAP_ARRAY; | 106 | return GL_TEXTURE_CUBE_MAP_ARRAY; |
| 215 | case ImageViewType::Rect: | 107 | case Shader::TextureType::Buffer: |
| 216 | return GL_TEXTURE_RECTANGLE; | ||
| 217 | case ImageViewType::Buffer: | ||
| 218 | return GL_TEXTURE_BUFFER; | 108 | return GL_TEXTURE_BUFFER; |
| 219 | } | 109 | } |
| 220 | UNREACHABLE_MSG("Invalid image view type={}", type); | 110 | UNREACHABLE_MSG("Invalid image view type={}", type); |
| @@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 322 | default: | 212 | default: |
| 323 | return false; | 213 | return false; |
| 324 | } | 214 | } |
| 325 | const GLenum internal_format = GetFormatTuple(info.format).internal_format; | 215 | const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format; |
| 326 | const auto& format_info = runtime.FormatInfo(info.type, internal_format); | 216 | const auto& format_info = runtime.FormatInfo(info.type, internal_format); |
| 327 | if (format_info.is_compressed) { | 217 | if (format_info.is_compressed) { |
| 328 | return false; | 218 | return false; |
| @@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 414 | 304 | ||
| 415 | void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | 305 | void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { |
| 416 | if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { | 306 | if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { |
| 417 | const GLuint texture = image_view->DefaultHandle(); | 307 | glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); |
| 418 | glNamedFramebufferTexture(fbo, attachment, texture, 0); | ||
| 419 | return; | 308 | return; |
| 420 | } | 309 | } |
| 421 | const GLuint texture = image_view->Handle(ImageViewType::e3D); | 310 | const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); |
| 422 | if (image_view->range.extent.layers > 1) { | 311 | if (image_view->range.extent.layers > 1) { |
| 423 | // TODO: OpenGL doesn't support rendering to a fixed number of slices | 312 | // TODO: OpenGL doesn't support rendering to a fixed number of slices |
| 424 | glNamedFramebufferTexture(fbo, attachment, texture, 0); | 313 | glNamedFramebufferTexture(fbo, attachment, texture, 0); |
| @@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 453 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | 342 | static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; |
| 454 | for (size_t i = 0; i < TARGETS.size(); ++i) { | 343 | for (size_t i = 0; i < TARGETS.size(); ++i) { |
| 455 | const GLenum target = TARGETS[i]; | 344 | const GLenum target = TARGETS[i]; |
| 456 | for (const FormatTuple& tuple : FORMAT_TABLE) { | 345 | for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { |
| 457 | const GLenum format = tuple.internal_format; | 346 | const GLenum format = tuple.internal_format; |
| 458 | GLint compat_class; | 347 | GLint compat_class; |
| 459 | GLint compat_type; | 348 | GLint compat_type; |
| @@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 475 | null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); | 364 | null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); |
| 476 | null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); | 365 | null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); |
| 477 | null_image_3d.Create(GL_TEXTURE_3D); | 366 | null_image_3d.Create(GL_TEXTURE_3D); |
| 478 | null_image_rect.Create(GL_TEXTURE_RECTANGLE); | ||
| 479 | glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); | 367 | glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); |
| 480 | glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); | 368 | glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); |
| 481 | glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); | 369 | glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); |
| 482 | glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); | ||
| 483 | 370 | ||
| 484 | std::array<GLuint, 4> new_handles; | 371 | std::array<GLuint, 4> new_handles; |
| 485 | glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); | 372 | glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); |
| @@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
| 496 | glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, | 383 | glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle, |
| 497 | GL_R8, 0, 1, 0, 6); | 384 | GL_R8, 0, 1, 0, 6); |
| 498 | const std::array texture_handles{ | 385 | const std::array texture_handles{ |
| 499 | null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, | 386 | null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle, |
| 500 | null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle, | 387 | null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle, |
| 501 | null_image_view_2d_array.handle, null_image_view_cube.handle, | 388 | null_image_view_cube.handle, |
| 502 | }; | 389 | }; |
| 503 | for (const GLuint handle : texture_handles) { | 390 | for (const GLuint handle : texture_handles) { |
| 504 | static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; | 391 | static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; |
| 505 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); | 392 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); |
| 506 | } | 393 | } |
| 507 | const auto set_view = [this](ImageViewType type, GLuint handle) { | 394 | const auto set_view = [this](Shader::TextureType type, GLuint handle) { |
| 508 | if (device.HasDebuggingToolAttached()) { | 395 | if (device.HasDebuggingToolAttached()) { |
| 509 | const std::string name = fmt::format("NullImage {}", type); | 396 | const std::string name = fmt::format("NullImage {}", type); |
| 510 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | 397 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); |
| 511 | } | 398 | } |
| 512 | null_image_views[static_cast<size_t>(type)] = handle; | 399 | null_image_views[static_cast<size_t>(type)] = handle; |
| 513 | }; | 400 | }; |
| 514 | set_view(ImageViewType::e1D, null_image_view_1d.handle); | 401 | set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); |
| 515 | set_view(ImageViewType::e2D, null_image_view_2d.handle); | 402 | set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); |
| 516 | set_view(ImageViewType::Cube, null_image_view_cube.handle); | 403 | set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); |
| 517 | set_view(ImageViewType::e3D, null_image_3d.handle); | 404 | set_view(Shader::TextureType::Color3D, null_image_3d.handle); |
| 518 | set_view(ImageViewType::e1DArray, null_image_1d_array.handle); | 405 | set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); |
| 519 | set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); | 406 | set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); |
| 520 | set_view(ImageViewType::CubeArray, null_image_cube_array.handle); | 407 | set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); |
| 521 | set_view(ImageViewType::Rect, null_image_rect.handle); | ||
| 522 | } | 408 | } |
| 523 | 409 | ||
| 524 | TextureCacheRuntime::~TextureCacheRuntime() = default; | 410 | TextureCacheRuntime::~TextureCacheRuntime() = default; |
| @@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 710 | gl_format = GL_RGBA; | 596 | gl_format = GL_RGBA; |
| 711 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | 597 | gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; |
| 712 | } else { | 598 | } else { |
| 713 | const auto& tuple = GetFormatTuple(info.format); | 599 | const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); |
| 714 | gl_internal_format = tuple.internal_format; | 600 | gl_internal_format = tuple.internal_format; |
| 715 | gl_format = tuple.format; | 601 | gl_format = tuple.format; |
| 716 | gl_type = tuple.type; | 602 | gl_type = tuple.type; |
| @@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 750 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | 636 | glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); |
| 751 | break; | 637 | break; |
| 752 | case GL_TEXTURE_BUFFER: | 638 | case GL_TEXTURE_BUFFER: |
| 753 | buffer.Create(); | 639 | UNREACHABLE(); |
| 754 | glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); | ||
| 755 | break; | 640 | break; |
| 756 | default: | 641 | default: |
| 757 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); | 642 | UNREACHABLE_MSG("Invalid target=0x{:x}", target); |
| @@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
| 789 | } | 674 | } |
| 790 | } | 675 | } |
| 791 | 676 | ||
| 792 | void Image::UploadMemory(const ImageBufferMap& map, | ||
| 793 | std::span<const VideoCommon::BufferCopy> copies) { | ||
| 794 | for (const VideoCommon::BufferCopy& copy : copies) { | ||
| 795 | glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, | ||
| 796 | copy.dst_offset, copy.size); | ||
| 797 | } | ||
| 798 | } | ||
| 799 | |||
| 800 | void Image::DownloadMemory(ImageBufferMap& map, | 677 | void Image::DownloadMemory(ImageBufferMap& map, |
| 801 | std::span<const VideoCommon::BufferImageCopy> copies) { | 678 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 802 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 679 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| @@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 958 | if (True(image.flags & ImageFlagBits::Converted)) { | 835 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 959 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | 836 | internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 960 | } else { | 837 | } else { |
| 961 | internal_format = GetFormatTuple(format).internal_format; | 838 | internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; |
| 962 | } | 839 | } |
| 963 | VideoCommon::SubresourceRange flatten_range = info.range; | 840 | VideoCommon::SubresourceRange flatten_range = info.range; |
| 964 | std::array<GLuint, 2> handles; | 841 | std::array<GLuint, 2> handles; |
| @@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 970 | [[fallthrough]]; | 847 | [[fallthrough]]; |
| 971 | case ImageViewType::e1D: | 848 | case ImageViewType::e1D: |
| 972 | glGenTextures(2, handles.data()); | 849 | glGenTextures(2, handles.data()); |
| 973 | SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); | 850 | SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); |
| 974 | SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); | 851 | SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); |
| 975 | break; | 852 | break; |
| 976 | case ImageViewType::e2DArray: | 853 | case ImageViewType::e2DArray: |
| 977 | flatten_range.extent.layers = 1; | 854 | flatten_range.extent.layers = 1; |
| @@ -985,62 +862,84 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
| 985 | .extent = {.levels = 1, .layers = 1}, | 862 | .extent = {.levels = 1, .layers = 1}, |
| 986 | }; | 863 | }; |
| 987 | glGenTextures(1, handles.data()); | 864 | glGenTextures(1, handles.data()); |
| 988 | SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); | 865 | SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); |
| 989 | break; | 866 | } else { |
| 867 | glGenTextures(2, handles.data()); | ||
| 868 | SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); | ||
| 869 | SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, | ||
| 870 | info.range); | ||
| 990 | } | 871 | } |
| 991 | glGenTextures(2, handles.data()); | ||
| 992 | SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); | ||
| 993 | SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); | ||
| 994 | break; | 872 | break; |
| 995 | case ImageViewType::e3D: | 873 | case ImageViewType::e3D: |
| 996 | glGenTextures(1, handles.data()); | 874 | glGenTextures(1, handles.data()); |
| 997 | SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); | 875 | SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); |
| 998 | break; | 876 | break; |
| 999 | case ImageViewType::CubeArray: | 877 | case ImageViewType::CubeArray: |
| 1000 | flatten_range.extent.layers = 6; | 878 | flatten_range.extent.layers = 6; |
| 1001 | [[fallthrough]]; | 879 | [[fallthrough]]; |
| 1002 | case ImageViewType::Cube: | 880 | case ImageViewType::Cube: |
| 1003 | glGenTextures(2, handles.data()); | 881 | glGenTextures(2, handles.data()); |
| 1004 | SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); | 882 | SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); |
| 1005 | SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); | 883 | SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); |
| 1006 | break; | 884 | break; |
| 1007 | case ImageViewType::Rect: | 885 | case ImageViewType::Rect: |
| 1008 | glGenTextures(1, handles.data()); | 886 | UNIMPLEMENTED(); |
| 1009 | SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); | ||
| 1010 | break; | 887 | break; |
| 1011 | case ImageViewType::Buffer: | 888 | case ImageViewType::Buffer: |
| 1012 | glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); | 889 | UNREACHABLE(); |
| 1013 | SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); | 890 | break; |
| 891 | } | ||
| 892 | switch (info.type) { | ||
| 893 | case ImageViewType::e1D: | ||
| 894 | default_handle = Handle(Shader::TextureType::Color1D); | ||
| 895 | break; | ||
| 896 | case ImageViewType::e1DArray: | ||
| 897 | default_handle = Handle(Shader::TextureType::ColorArray1D); | ||
| 898 | break; | ||
| 899 | case ImageViewType::e2D: | ||
| 900 | default_handle = Handle(Shader::TextureType::Color2D); | ||
| 901 | break; | ||
| 902 | case ImageViewType::e2DArray: | ||
| 903 | default_handle = Handle(Shader::TextureType::ColorArray2D); | ||
| 904 | break; | ||
| 905 | case ImageViewType::e3D: | ||
| 906 | default_handle = Handle(Shader::TextureType::Color3D); | ||
| 907 | break; | ||
| 908 | case ImageViewType::Cube: | ||
| 909 | default_handle = Handle(Shader::TextureType::ColorCube); | ||
| 910 | break; | ||
| 911 | case ImageViewType::CubeArray: | ||
| 912 | default_handle = Handle(Shader::TextureType::ColorArrayCube); | ||
| 913 | break; | ||
| 914 | default: | ||
| 1014 | break; | 915 | break; |
| 1015 | } | 916 | } |
| 1016 | default_handle = Handle(info.type); | ||
| 1017 | } | 917 | } |
| 1018 | 918 | ||
| 1019 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | 919 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, |
| 920 | const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | ||
| 921 | : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | ||
| 922 | buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | ||
| 923 | |||
| 924 | ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||
| 1020 | const VideoCommon::ImageViewInfo& view_info) | 925 | const VideoCommon::ImageViewInfo& view_info) |
| 1021 | : VideoCommon::ImageViewBase{info, view_info} {} | 926 | : VideoCommon::ImageViewBase{info, view_info} {} |
| 1022 | 927 | ||
| 1023 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) | 928 | ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) |
| 1024 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} | 929 | : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} |
| 1025 | 930 | ||
| 1026 | void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, | 931 | void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, |
| 1027 | GLuint handle, const VideoCommon::ImageViewInfo& info, | 932 | GLuint handle, const VideoCommon::ImageViewInfo& info, |
| 1028 | VideoCommon::SubresourceRange view_range) { | 933 | VideoCommon::SubresourceRange view_range) { |
| 1029 | if (info.type == ImageViewType::Buffer) { | 934 | const GLuint parent = image.texture.handle; |
| 1030 | // TODO: Take offset from buffer cache | 935 | const GLenum target = ImageTarget(view_type, image.info.num_samples); |
| 1031 | glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, | 936 | glTextureView(handle, target, parent, internal_format, view_range.base.level, |
| 1032 | image.guest_size_bytes); | 937 | view_range.extent.levels, view_range.base.layer, view_range.extent.layers); |
| 1033 | } else { | 938 | if (!info.IsRenderTarget()) { |
| 1034 | const GLuint parent = image.texture.handle; | 939 | ApplySwizzle(handle, format, info.Swizzle()); |
| 1035 | const GLenum target = ImageTarget(view_type, image.info.num_samples); | ||
| 1036 | glTextureView(handle, target, parent, internal_format, view_range.base.level, | ||
| 1037 | view_range.extent.levels, view_range.base.layer, view_range.extent.layers); | ||
| 1038 | if (!info.IsRenderTarget()) { | ||
| 1039 | ApplySwizzle(handle, format, info.Swizzle()); | ||
| 1040 | } | ||
| 1041 | } | 940 | } |
| 1042 | if (device.HasDebuggingToolAttached()) { | 941 | if (device.HasDebuggingToolAttached()) { |
| 1043 | const std::string name = VideoCommon::Name(*this, view_type); | 942 | const std::string name = VideoCommon::Name(*this); |
| 1044 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | 943 | glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); |
| 1045 | } | 944 | } |
| 1046 | stored_views.emplace_back().handle = handle; | 945 | stored_views.emplace_back().handle = handle; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 817b0e650..2e3e02b79 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include <glad/glad.h> | 10 | #include <glad/glad.h> |
| 11 | 11 | ||
| 12 | #include "shader_recompiler/shader_info.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/util_shaders.h" | 14 | #include "video_core/renderer_opengl/util_shaders.h" |
| 14 | #include "video_core/texture_cache/texture_cache.h" | 15 | #include "video_core/texture_cache/texture_cache.h" |
| @@ -127,13 +128,12 @@ private: | |||
| 127 | OGLTexture null_image_1d_array; | 128 | OGLTexture null_image_1d_array; |
| 128 | OGLTexture null_image_cube_array; | 129 | OGLTexture null_image_cube_array; |
| 129 | OGLTexture null_image_3d; | 130 | OGLTexture null_image_3d; |
| 130 | OGLTexture null_image_rect; | ||
| 131 | OGLTextureView null_image_view_1d; | 131 | OGLTextureView null_image_view_1d; |
| 132 | OGLTextureView null_image_view_2d; | 132 | OGLTextureView null_image_view_2d; |
| 133 | OGLTextureView null_image_view_2d_array; | 133 | OGLTextureView null_image_view_2d_array; |
| 134 | OGLTextureView null_image_view_cube; | 134 | OGLTextureView null_image_view_cube; |
| 135 | 135 | ||
| 136 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; | 136 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; |
| 137 | }; | 137 | }; |
| 138 | 138 | ||
| 139 | class Image : public VideoCommon::ImageBase { | 139 | class Image : public VideoCommon::ImageBase { |
| @@ -154,8 +154,6 @@ public: | |||
| 154 | void UploadMemory(const ImageBufferMap& map, | 154 | void UploadMemory(const ImageBufferMap& map, |
| 155 | std::span<const VideoCommon::BufferImageCopy> copies); | 155 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 156 | 156 | ||
| 157 | void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); | ||
| 158 | |||
| 159 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | 157 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); |
| 160 | 158 | ||
| 161 | GLuint StorageHandle() noexcept; | 159 | GLuint StorageHandle() noexcept; |
| @@ -170,7 +168,6 @@ private: | |||
| 170 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); | 168 | void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); |
| 171 | 169 | ||
| 172 | OGLTexture texture; | 170 | OGLTexture texture; |
| 173 | OGLBuffer buffer; | ||
| 174 | OGLTextureView store_view; | 171 | OGLTextureView store_view; |
| 175 | GLenum gl_internal_format = GL_NONE; | 172 | GLenum gl_internal_format = GL_NONE; |
| 176 | GLenum gl_format = GL_NONE; | 173 | GLenum gl_format = GL_NONE; |
| @@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { | |||
| 182 | 179 | ||
| 183 | public: | 180 | public: |
| 184 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | 181 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); |
| 182 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, | ||
| 183 | const VideoCommon::ImageViewInfo&, GPUVAddr); | ||
| 185 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | 184 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, |
| 186 | const VideoCommon::ImageViewInfo& view_info); | 185 | const VideoCommon::ImageViewInfo& view_info); |
| 187 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | 186 | explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); |
| 188 | 187 | ||
| 189 | [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { | 188 | [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { |
| 190 | return views[static_cast<size_t>(query_type)]; | 189 | return views[static_cast<size_t>(handle_type)]; |
| 191 | } | 190 | } |
| 192 | 191 | ||
| 193 | [[nodiscard]] GLuint DefaultHandle() const noexcept { | 192 | [[nodiscard]] GLuint DefaultHandle() const noexcept { |
| @@ -198,15 +197,25 @@ public: | |||
| 198 | return internal_format; | 197 | return internal_format; |
| 199 | } | 198 | } |
| 200 | 199 | ||
| 200 | [[nodiscard]] GPUVAddr GpuAddr() const noexcept { | ||
| 201 | return gpu_addr; | ||
| 202 | } | ||
| 203 | |||
| 204 | [[nodiscard]] u32 BufferSize() const noexcept { | ||
| 205 | return buffer_size; | ||
| 206 | } | ||
| 207 | |||
| 201 | private: | 208 | private: |
| 202 | void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, | 209 | void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, |
| 203 | const VideoCommon::ImageViewInfo& info, | 210 | const VideoCommon::ImageViewInfo& info, |
| 204 | VideoCommon::SubresourceRange view_range); | 211 | VideoCommon::SubresourceRange view_range); |
| 205 | 212 | ||
| 206 | std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; | 213 | std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{}; |
| 207 | std::vector<OGLTextureView> stored_views; | 214 | std::vector<OGLTextureView> stored_views; |
| 208 | GLuint default_handle = 0; | ||
| 209 | GLenum internal_format = GL_NONE; | 215 | GLenum internal_format = GL_NONE; |
| 216 | GLuint default_handle = 0; | ||
| 217 | GPUVAddr gpu_addr = 0; | ||
| 218 | u32 buffer_size = 0; | ||
| 210 | }; | 219 | }; |
| 211 | 220 | ||
| 212 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; | 221 | class ImageAlloc : public VideoCommon::ImageAllocBase {}; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index f7ad8f370..672f94bfc 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -5,12 +5,120 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | |||
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/surface.h" | ||
| 9 | 11 | ||
| 10 | namespace OpenGL::MaxwellToGL { | 12 | namespace OpenGL::MaxwellToGL { |
| 11 | 13 | ||
| 12 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 14 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 13 | 15 | ||
| 16 | struct FormatTuple { | ||
| 17 | GLenum internal_format; | ||
| 18 | GLenum format = GL_NONE; | ||
| 19 | GLenum type = GL_NONE; | ||
| 20 | }; | ||
| 21 | |||
| 22 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{ | ||
| 23 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM | ||
| 24 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM | ||
| 25 | {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT | ||
| 26 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT | ||
| 27 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM | ||
| 28 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM | ||
| 29 | {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM | ||
| 30 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM | ||
| 31 | {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT | ||
| 32 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM | ||
| 33 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM | ||
| 34 | {GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM | ||
| 35 | {GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT | ||
| 36 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT | ||
| 37 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT | ||
| 38 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM | ||
| 39 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM | ||
| 40 | {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT | ||
| 41 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT | ||
| 42 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT | ||
| 43 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT | ||
| 44 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM | ||
| 45 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM | ||
| 46 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM | ||
| 47 | {GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM | ||
| 48 | {GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM | ||
| 49 | {GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM | ||
| 50 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM | ||
| 51 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM | ||
| 52 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT | ||
| 53 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT | ||
| 54 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM | ||
| 55 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM | ||
| 56 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT | ||
| 57 | {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT | ||
| 58 | {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT | ||
| 59 | {GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT | ||
| 60 | {GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT | ||
| 61 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT | ||
| 62 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM | ||
| 63 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM | ||
| 64 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT | ||
| 65 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT | ||
| 66 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM | ||
| 67 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT | ||
| 68 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT | ||
| 69 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT | ||
| 70 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM | ||
| 71 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT | ||
| 72 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB | ||
| 73 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM | ||
| 74 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM | ||
| 75 | {GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT | ||
| 76 | {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT | ||
| 77 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT | ||
| 78 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT | ||
| 79 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT | ||
| 80 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT | ||
| 81 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM | ||
| 82 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM | ||
| 83 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM | ||
| 84 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB | ||
| 85 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB | ||
| 86 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB | ||
| 87 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB | ||
| 88 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB | ||
| 89 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM | ||
| 90 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB | ||
| 91 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB | ||
| 92 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB | ||
| 93 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB | ||
| 94 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM | ||
| 95 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB | ||
| 96 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM | ||
| 97 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB | ||
| 98 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM | ||
| 99 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB | ||
| 100 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM | ||
| 101 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB | ||
| 102 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM | ||
| 103 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB | ||
| 104 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM | ||
| 105 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB | ||
| 106 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM | ||
| 107 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB | ||
| 108 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT | ||
| 109 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT | ||
| 110 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM | ||
| 111 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT | ||
| 112 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM | ||
| 113 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | ||
| 114 | GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT | ||
| 115 | }}; | ||
| 116 | |||
| 117 | inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { | ||
| 118 | ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); | ||
| 119 | return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; | ||
| 120 | } | ||
| 121 | |||
| 14 | inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { | 122 | inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { |
| 15 | switch (attrib.type) { | 123 | switch (attrib.type) { |
| 16 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | 124 | case Maxwell::VertexAttribute::Type::UnsignedNorm: |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c12929de6..4e77ef808 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
| 130 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 130 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 131 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | 131 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, |
| 132 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, | 132 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, |
| 133 | program_manager{device}, | ||
| 134 | rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | 133 | rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { |
| 135 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | 134 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |
| 136 | glEnable(GL_DEBUG_OUTPUT); | 135 | glEnable(GL_DEBUG_OUTPUT); |
| @@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 236 | OGLShader fragment_shader; | 235 | OGLShader fragment_shader; |
| 237 | fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); | 236 | fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); |
| 238 | 237 | ||
| 239 | vertex_program.Create(true, false, vertex_shader.handle); | 238 | present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); |
| 240 | fragment_program.Create(true, false, fragment_shader.handle); | ||
| 241 | |||
| 242 | pipeline.Create(); | ||
| 243 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); | ||
| 244 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | ||
| 245 | 239 | ||
| 246 | // Generate presentation sampler | 240 | // Generate presentation sampler |
| 247 | present_sampler.Create(); | 241 | present_sampler.Create(); |
| @@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 342 | // Set projection matrix | 336 | // Set projection matrix |
| 343 | const std::array ortho_matrix = | 337 | const std::array ortho_matrix = |
| 344 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); | 338 | MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); |
| 345 | glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, | 339 | program_manager.BindProgram(present_program.handle); |
| 346 | std::data(ortho_matrix)); | 340 | glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); |
| 347 | 341 | ||
| 348 | const auto& texcoords = screen_info.display_texcoords; | 342 | const auto& texcoords = screen_info.display_texcoords; |
| 349 | auto left = texcoords.left; | 343 | auto left = texcoords.left; |
| @@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 404 | state_tracker.NotifyClipControl(); | 398 | state_tracker.NotifyClipControl(); |
| 405 | state_tracker.NotifyAlphaTest(); | 399 | state_tracker.NotifyAlphaTest(); |
| 406 | 400 | ||
| 407 | program_manager.BindHostPipeline(pipeline.handle); | ||
| 408 | |||
| 409 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | 401 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); |
| 410 | glEnable(GL_CULL_FACE); | 402 | glEnable(GL_CULL_FACE); |
| 411 | if (screen_info.display_srgb) { | 403 | if (screen_info.display_srgb) { |
| @@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 453 | glClear(GL_COLOR_BUFFER_BIT); | 445 | glClear(GL_COLOR_BUFFER_BIT); |
| 454 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 446 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 455 | 447 | ||
| 456 | program_manager.RestoreGuestPipeline(); | 448 | // TODO |
| 449 | // program_manager.RestoreGuestPipeline(); | ||
| 457 | } | 450 | } |
| 458 | 451 | ||
| 459 | void RendererOpenGL::RenderScreenshot() { | 452 | void RendererOpenGL::RenderScreenshot() { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 0b66f8332..b3ee55665 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 12 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 15 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 17 | 16 | ||
| 18 | namespace Core { | 17 | namespace Core { |
| @@ -111,9 +110,7 @@ private: | |||
| 111 | // OpenGL object IDs | 110 | // OpenGL object IDs |
| 112 | OGLSampler present_sampler; | 111 | OGLSampler present_sampler; |
| 113 | OGLBuffer vertex_buffer; | 112 | OGLBuffer vertex_buffer; |
| 114 | OGLProgram vertex_program; | 113 | OGLProgram present_program; |
| 115 | OGLProgram fragment_program; | ||
| 116 | OGLPipeline pipeline; | ||
| 117 | OGLFramebuffer screenshot_framebuffer; | 114 | OGLFramebuffer screenshot_framebuffer; |
| 118 | 115 | ||
| 119 | // GPU address of the vertex buffer | 116 | // GPU address of the vertex buffer |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 8fb5be393..51e72b705 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | 16 | #include "video_core/host_shaders/opengl_copy_bc4_comp.h" |
| 17 | #include "video_core/host_shaders/opengl_copy_bgra_comp.h" | 17 | #include "video_core/host_shaders/opengl_copy_bgra_comp.h" |
| 18 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" | 18 | #include "video_core/host_shaders/pitch_unswizzle_comp.h" |
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 19 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 21 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 20 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 22 | #include "video_core/renderer_opengl/util_shaders.h" | 21 | #include "video_core/renderer_opengl/util_shaders.h" |
| @@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
| 86 | .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), | 85 | .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), |
| 87 | .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), | 86 | .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), |
| 88 | }; | 87 | }; |
| 89 | program_manager.BindHostCompute(astc_decoder_program.handle); | 88 | program_manager.BindProgram(astc_decoder_program.handle); |
| 90 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 89 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 91 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); | 90 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); |
| 92 | 91 | ||
| @@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | |||
| 134 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | 133 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; |
| 135 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 134 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 136 | 135 | ||
| 137 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | 136 | program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); |
| 138 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 137 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 139 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 138 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 140 | 139 | ||
| @@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | |||
| 173 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 172 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 174 | 173 | ||
| 175 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 174 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 176 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | 175 | program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); |
| 177 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 176 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 178 | 177 | ||
| 179 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 178 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| @@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, | |||
| 222 | UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), | 221 | UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), |
| 223 | "Non-power of two images are not implemented"); | 222 | "Non-power of two images are not implemented"); |
| 224 | 223 | ||
| 225 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | 224 | program_manager.BindProgram(pitch_unswizzle_program.handle); |
| 226 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | 225 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 227 | glUniform2ui(LOC_ORIGIN, 0, 0); | 226 | glUniform2ui(LOC_ORIGIN, 0, 0); |
| 228 | glUniform2i(LOC_DESTINATION, 0, 0); | 227 | glUniform2i(LOC_DESTINATION, 0, 0); |
| @@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | |||
| 250 | static constexpr GLuint LOC_SRC_OFFSET = 0; | 249 | static constexpr GLuint LOC_SRC_OFFSET = 0; |
| 251 | static constexpr GLuint LOC_DST_OFFSET = 1; | 250 | static constexpr GLuint LOC_DST_OFFSET = 1; |
| 252 | 251 | ||
| 253 | program_manager.BindHostCompute(copy_bc4_program.handle); | 252 | program_manager.BindProgram(copy_bc4_program.handle); |
| 254 | 253 | ||
| 255 | for (const ImageCopy& copy : copies) { | 254 | for (const ImageCopy& copy : copies) { |
| 256 | ASSERT(copy.src_subresource.base_layer == 0); | 255 | ASSERT(copy.src_subresource.base_layer == 0); |
| @@ -286,7 +285,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, | |||
| 286 | break; | 285 | break; |
| 287 | case 4: { | 286 | case 4: { |
| 288 | // BGRA8 copy | 287 | // BGRA8 copy |
| 289 | program_manager.BindHostCompute(copy_bgra_program.handle); | 288 | program_manager.BindProgram(copy_bgra_program.handle); |
| 290 | constexpr GLenum FORMAT = GL_RGBA8; | 289 | constexpr GLenum FORMAT = GL_RGBA8; |
| 291 | for (const ImageCopy& copy : copies) { | 290 | for (const ImageCopy& copy : copies) { |
| 292 | ASSERT(copy.src_offset == zero_offset); | 291 | ASSERT(copy.src_offset == zero_offset); |
diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index dd7d2cc0c..c6e5e059b 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h | |||
| @@ -19,23 +19,6 @@ | |||
| 19 | 19 | ||
| 20 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | 21 | ||
| 22 | struct TextureHandle { | ||
| 23 | explicit TextureHandle(u32 data, bool via_header_index) { | ||
| 24 | [[likely]] if (via_header_index) { | ||
| 25 | image = data; | ||
| 26 | sampler = data; | ||
| 27 | } | ||
| 28 | else { | ||
| 29 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 30 | image = handle.tic_id; | ||
| 31 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | u32 image; | ||
| 36 | u32 sampler; | ||
| 37 | }; | ||
| 38 | |||
| 39 | class DescriptorLayoutBuilder { | 22 | class DescriptorLayoutBuilder { |
| 40 | public: | 23 | public: |
| 41 | DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} | 24 | DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index c52001b5a..c27402ff0 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -140,8 +140,8 @@ struct BufferCacheParams { | |||
| 140 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; | 140 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; |
| 141 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | 141 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; |
| 142 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | 142 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; |
| 143 | static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; | ||
| 144 | static constexpr bool USE_MEMORY_MAPS = true; | 143 | static constexpr bool USE_MEMORY_MAPS = true; |
| 144 | static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | ||
| 145 | }; | 145 | }; |
| 146 | 146 | ||
| 147 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | 147 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index feaace0c5..168ffa7e9 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -18,6 +18,9 @@ | |||
| 18 | 18 | ||
| 19 | namespace Vulkan { | 19 | namespace Vulkan { |
| 20 | 20 | ||
| 21 | using Shader::ImageBufferDescriptor; | ||
| 22 | using Tegra::Texture::TexturePair; | ||
| 23 | |||
| 21 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, | 24 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, |
| 22 | VKUpdateDescriptorQueue& update_descriptor_queue_, | 25 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 23 | Common::ThreadWorker* thread_worker, const Shader::Info& info_, | 26 | Common::ThreadWorker* thread_worker, const Shader::Info& info_, |
| @@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 106 | secondary_offset}; | 109 | secondary_offset}; |
| 107 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | 110 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; |
| 108 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | 111 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; |
| 109 | return TextureHandle{lhs_raw | rhs_raw, via_header_index}; | 112 | return TexturePair(lhs_raw | rhs_raw, via_header_index); |
| 110 | } | 113 | } |
| 111 | } | 114 | } |
| 112 | return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; | 115 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); |
| 113 | }}; | 116 | }}; |
| 114 | const auto add_image{[&](const auto& desc) { | 117 | const auto add_image{[&](const auto& desc) { |
| 115 | for (u32 index = 0; index < desc.count; ++index) { | 118 | for (u32 index = 0; index < desc.count; ++index) { |
| 116 | const TextureHandle handle{read_handle(desc, index)}; | 119 | const auto handle{read_handle(desc, index)}; |
| 117 | image_view_indices.push_back(handle.image); | 120 | image_view_indices.push_back(handle.first); |
| 118 | } | 121 | } |
| 119 | }}; | 122 | }}; |
| 120 | std::ranges::for_each(info.texture_buffer_descriptors, add_image); | 123 | std::ranges::for_each(info.texture_buffer_descriptors, add_image); |
| 121 | std::ranges::for_each(info.image_buffer_descriptors, add_image); | 124 | std::ranges::for_each(info.image_buffer_descriptors, add_image); |
| 122 | for (const auto& desc : info.texture_descriptors) { | 125 | for (const auto& desc : info.texture_descriptors) { |
| 123 | for (u32 index = 0; index < desc.count; ++index) { | 126 | for (u32 index = 0; index < desc.count; ++index) { |
| 124 | const TextureHandle handle{read_handle(desc, index)}; | 127 | const auto handle{read_handle(desc, index)}; |
| 125 | image_view_indices.push_back(handle.image); | 128 | image_view_indices.push_back(handle.first); |
| 126 | 129 | ||
| 127 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | 130 | Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); |
| 128 | samplers.push_back(sampler->Handle()); | 131 | samplers.push_back(sampler->Handle()); |
| 129 | } | 132 | } |
| 130 | } | 133 | } |
| @@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
| 137 | ImageId* texture_buffer_ids{image_view_ids.data()}; | 140 | ImageId* texture_buffer_ids{image_view_ids.data()}; |
| 138 | size_t index{}; | 141 | size_t index{}; |
| 139 | const auto add_buffer{[&](const auto& desc) { | 142 | const auto add_buffer{[&](const auto& desc) { |
| 143 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 140 | for (u32 i = 0; i < desc.count; ++i) { | 144 | for (u32 i = 0; i < desc.count; ++i) { |
| 141 | bool is_written{false}; | 145 | bool is_written{false}; |
| 142 | if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) { | 146 | if constexpr (is_image) { |
| 143 | is_written = desc.is_written; | 147 | is_written = desc.is_written; |
| 144 | } | 148 | } |
| 145 | ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); | 149 | ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); |
| 146 | buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), | 150 | buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), |
| 147 | image_view.BufferSize(), image_view.format, | 151 | image_view.BufferSize(), image_view.format, |
| 148 | is_written); | 152 | is_written, is_image); |
| 149 | ++texture_buffer_ids; | 153 | ++texture_buffer_ids; |
| 150 | ++index; | 154 | ++index; |
| 151 | } | 155 | } |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 9f5d30fe8..e5f54a84f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | 20 | #include "video_core/vulkan_common/vulkan_device.h" |
| 21 | 21 | ||
| 22 | #ifdef _MSC_VER | 22 | #if defined(_MSC_VER) && defined(NDEBUG) |
| 23 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | 23 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] |
| 24 | #else | 24 | #else |
| 25 | #define LAMBDA_FORCEINLINE | 25 | #define LAMBDA_FORCEINLINE |
| @@ -30,6 +30,7 @@ namespace { | |||
| 30 | using boost::container::small_vector; | 30 | using boost::container::small_vector; |
| 31 | using boost::container::static_vector; | 31 | using boost::container::static_vector; |
| 32 | using Shader::ImageBufferDescriptor; | 32 | using Shader::ImageBufferDescriptor; |
| 33 | using Tegra::Texture::TexturePair; | ||
| 33 | using VideoCore::Surface::PixelFormat; | 34 | using VideoCore::Surface::PixelFormat; |
| 34 | using VideoCore::Surface::PixelFormatFromDepthFormat; | 35 | using VideoCore::Surface::PixelFormatFromDepthFormat; |
| 35 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 36 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| @@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 289 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | 290 | const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; |
| 290 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | 291 | const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; |
| 291 | const u32 raw{lhs_raw | rhs_raw}; | 292 | const u32 raw{lhs_raw | rhs_raw}; |
| 292 | return TextureHandle{raw, via_header_index}; | 293 | return TexturePair(raw, via_header_index); |
| 293 | } | 294 | } |
| 294 | } | 295 | } |
| 295 | return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; | 296 | return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); |
| 296 | }}; | 297 | }}; |
| 297 | const auto add_image{[&](const auto& desc) { | 298 | const auto add_image{[&](const auto& desc) { |
| 298 | for (u32 index = 0; index < desc.count; ++index) { | 299 | for (u32 index = 0; index < desc.count; ++index) { |
| 299 | const TextureHandle handle{read_handle(desc, index)}; | 300 | const auto handle{read_handle(desc, index)}; |
| 300 | image_view_indices[image_index++] = handle.image; | 301 | image_view_indices[image_index++] = handle.first; |
| 301 | } | 302 | } |
| 302 | }}; | 303 | }}; |
| 303 | if constexpr (Spec::has_texture_buffers) { | 304 | if constexpr (Spec::has_texture_buffers) { |
| @@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 312 | } | 313 | } |
| 313 | for (const auto& desc : info.texture_descriptors) { | 314 | for (const auto& desc : info.texture_descriptors) { |
| 314 | for (u32 index = 0; index < desc.count; ++index) { | 315 | for (u32 index = 0; index < desc.count; ++index) { |
| 315 | const TextureHandle handle{read_handle(desc, index)}; | 316 | const auto handle{read_handle(desc, index)}; |
| 316 | image_view_indices[image_index++] = handle.image; | 317 | image_view_indices[image_index++] = handle.first; |
| 317 | 318 | ||
| 318 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; | 319 | Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; |
| 319 | samplers[sampler_index++] = sampler->Handle(); | 320 | samplers[sampler_index++] = sampler->Handle(); |
| 320 | } | 321 | } |
| 321 | } | 322 | } |
| @@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 347 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | 348 | const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { |
| 348 | size_t index{}; | 349 | size_t index{}; |
| 349 | const auto add_buffer{[&](const auto& desc) { | 350 | const auto add_buffer{[&](const auto& desc) { |
| 351 | constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||
| 350 | for (u32 i = 0; i < desc.count; ++i) { | 352 | for (u32 i = 0; i < desc.count; ++i) { |
| 351 | bool is_written{false}; | 353 | bool is_written{false}; |
| 352 | if constexpr (std::is_same_v<decltype(desc), const ImageBufferDescriptor&>) { | 354 | if constexpr (is_image) { |
| 353 | is_written = desc.is_written; | 355 | is_written = desc.is_written; |
| 354 | } | 356 | } |
| 355 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | 357 | ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; |
| 356 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | 358 | buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), |
| 357 | image_view.BufferSize(), image_view.format, | 359 | image_view.BufferSize(), image_view.format, |
| 358 | is_written); | 360 | is_written, is_image); |
| 359 | ++index; | 361 | ++index; |
| 360 | ++texture_buffer_index; | 362 | ++texture_buffer_index; |
| 361 | } | 363 | } |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1334882b5..30b71bdbc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -342,28 +342,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 342 | } | 342 | } |
| 343 | 343 | ||
| 344 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | 344 | std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { |
| 345 | main_pools.ReleaseContents(); | 345 | GraphicsEnvironments environments; |
| 346 | 346 | GetGraphicsEnvironments(environments, graphics_key.unique_hashes); | |
| 347 | std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs; | ||
| 348 | boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs; | ||
| 349 | 347 | ||
| 350 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | 348 | main_pools.ReleaseContents(); |
| 351 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 349 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; |
| 352 | if (graphics_key.unique_hashes[index] == 0) { | ||
| 353 | continue; | ||
| 354 | } | ||
| 355 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||
| 356 | auto& env{graphics_envs[index]}; | ||
| 357 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | ||
| 358 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 359 | env.SetCachedSize(shader_infos[index]->size_bytes); | ||
| 360 | envs.push_back(&env); | ||
| 361 | } | ||
| 362 | auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; | ||
| 363 | if (pipeline_cache_filename.empty()) { | 350 | if (pipeline_cache_filename.empty()) { |
| 364 | return pipeline; | 351 | return pipeline; |
| 365 | } | 352 | } |
| 366 | serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { | 353 | serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { |
| 367 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> | 354 | boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> |
| 368 | env_ptrs; | 355 | env_ptrs; |
| 369 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 356 | for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 0f15ad2f7..ef14e91e7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | |||
| 96 | return scissor; | 96 | return scissor; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | struct TextureHandle { | ||
| 100 | constexpr TextureHandle(u32 data, bool via_header_index) { | ||
| 101 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 102 | image = handle.tic_id; | ||
| 103 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 104 | } | ||
| 105 | |||
| 106 | u32 image; | ||
| 107 | u32 sampler; | ||
| 108 | }; | ||
| 109 | |||
| 110 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | 99 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 111 | bool is_indexed) { | 100 | bool is_indexed) { |
| 112 | DrawParams params{ | 101 | DrawParams params{ |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index b8b8eace5..78bf90c48 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { | |||
| 91 | return MakeShaderInfo(env, *cpu_shader_addr); | 91 | return MakeShaderInfo(env, *cpu_shader_addr); |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, | ||
| 95 | const std::array<u64, NUM_PROGRAMS>& unique_hashes) { | ||
| 96 | size_t env_index{}; | ||
| 97 | const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||
| 98 | for (size_t index = 0; index < NUM_PROGRAMS; ++index) { | ||
| 99 | if (unique_hashes[index] == 0) { | ||
| 100 | continue; | ||
| 101 | } | ||
| 102 | const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | ||
| 103 | auto& env{result.envs[index]}; | ||
| 104 | const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | ||
| 105 | env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||
| 106 | env.SetCachedSize(shader_infos[index]->size_bytes); | ||
| 107 | result.env_ptrs[env_index++] = &env; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 94 | ShaderInfo* ShaderCache::TryGet(VAddr addr) const { | 111 | ShaderInfo* ShaderCache::TryGet(VAddr addr) const { |
| 95 | std::scoped_lock lock{lookup_mutex}; | 112 | std::scoped_lock lock{lookup_mutex}; |
| 96 | 113 | ||
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 89a4bcc84..136fe294c 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h | |||
| @@ -4,14 +4,18 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 7 | #include <memory> | 9 | #include <memory> |
| 8 | #include <mutex> | 10 | #include <mutex> |
| 11 | #include <span> | ||
| 9 | #include <unordered_map> | 12 | #include <unordered_map> |
| 10 | #include <utility> | 13 | #include <utility> |
| 11 | #include <vector> | 14 | #include <vector> |
| 12 | 15 | ||
| 13 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 18 | #include "video_core/shader_environment.h" | ||
| 15 | 19 | ||
| 16 | namespace Tegra { | 20 | namespace Tegra { |
| 17 | class MemoryManager; | 21 | class MemoryManager; |
| @@ -30,6 +34,8 @@ class ShaderCache { | |||
| 30 | static constexpr u64 PAGE_BITS = 14; | 34 | static constexpr u64 PAGE_BITS = 14; |
| 31 | static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; | 35 | static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; |
| 32 | 36 | ||
| 37 | static constexpr size_t NUM_PROGRAMS = 6; | ||
| 38 | |||
| 33 | struct Entry { | 39 | struct Entry { |
| 34 | VAddr addr_start; | 40 | VAddr addr_start; |
| 35 | VAddr addr_end; | 41 | VAddr addr_end; |
| @@ -58,6 +64,15 @@ public: | |||
| 58 | void SyncGuestHost(); | 64 | void SyncGuestHost(); |
| 59 | 65 | ||
| 60 | protected: | 66 | protected: |
| 67 | struct GraphicsEnvironments { | ||
| 68 | std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; | ||
| 69 | std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs; | ||
| 70 | |||
| 71 | std::span<Shader::Environment* const> Span() const noexcept { | ||
| 72 | return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); | ||
| 73 | } | ||
| 74 | }; | ||
| 75 | |||
| 61 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, | 76 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, |
| 62 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | 77 | Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, |
| 63 | Tegra::Engines::KeplerCompute& kepler_compute_); | 78 | Tegra::Engines::KeplerCompute& kepler_compute_); |
| @@ -65,17 +80,21 @@ protected: | |||
| 65 | /// @brief Update the hashes and information of shader stages | 80 | /// @brief Update the hashes and information of shader stages |
| 66 | /// @param unique_hashes Shader hashes to store into when a stage is enabled | 81 | /// @param unique_hashes Shader hashes to store into when a stage is enabled |
| 67 | /// @return True no success, false on error | 82 | /// @return True no success, false on error |
| 68 | bool RefreshStages(std::array<u64, 6>& unique_hashes); | 83 | bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes); |
| 69 | 84 | ||
| 70 | /// @brief Returns information about the current compute shader | 85 | /// @brief Returns information about the current compute shader |
| 71 | /// @return Pointer to a valid shader, nullptr on error | 86 | /// @return Pointer to a valid shader, nullptr on error |
| 72 | const ShaderInfo* ComputeShader(); | 87 | const ShaderInfo* ComputeShader(); |
| 73 | 88 | ||
| 89 | /// @brief Collect the current graphics environments | ||
| 90 | void GetGraphicsEnvironments(GraphicsEnvironments& result, | ||
| 91 | const std::array<u64, NUM_PROGRAMS>& unique_hashes); | ||
| 92 | |||
| 74 | Tegra::MemoryManager& gpu_memory; | 93 | Tegra::MemoryManager& gpu_memory; |
| 75 | Tegra::Engines::Maxwell3D& maxwell3d; | 94 | Tegra::Engines::Maxwell3D& maxwell3d; |
| 76 | Tegra::Engines::KeplerCompute& kepler_compute; | 95 | Tegra::Engines::KeplerCompute& kepler_compute; |
| 77 | 96 | ||
| 78 | std::array<const ShaderInfo*, 6> shader_infos{}; | 97 | std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; |
| 79 | bool last_shaders_valid = false; | 98 | bool last_shaders_valid = false; |
| 80 | 99 | ||
| 81 | private: | 100 | private: |
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 5dccc0097..c93174519 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp | |||
| @@ -187,8 +187,8 @@ std::optional<u64> GenericEnvironment::TryFindSize() { | |||
| 187 | 187 | ||
| 188 | Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, | 188 | Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, |
| 189 | bool via_header_index, u32 raw) { | 189 | bool via_header_index, u32 raw) { |
| 190 | const TextureHandle handle{raw, via_header_index}; | 190 | const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; |
| 191 | const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; | 191 | const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; |
| 192 | Tegra::Texture::TICEntry entry; | 192 | Tegra::Texture::TICEntry entry; |
| 193 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | 193 | gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); |
| 194 | const Shader::TextureType result{ConvertType(entry)}; | 194 | const Shader::TextureType result{ConvertType(entry)}; |
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h index 37d712045..d26dbfaab 100644 --- a/src/video_core/shader_environment.h +++ b/src/video_core/shader_environment.h | |||
| @@ -29,22 +29,6 @@ class Memorymanager; | |||
| 29 | 29 | ||
| 30 | namespace VideoCommon { | 30 | namespace VideoCommon { |
| 31 | 31 | ||
| 32 | struct TextureHandle { | ||
| 33 | explicit TextureHandle(u32 data, bool via_header_index) { | ||
| 34 | if (via_header_index) { | ||
| 35 | image = data; | ||
| 36 | sampler = data; | ||
| 37 | } else { | ||
| 38 | const Tegra::Texture::TextureHandle handle{data}; | ||
| 39 | image = handle.tic_id; | ||
| 40 | sampler = via_header_index ? image : handle.tsc_id.Value(); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | u32 image; | ||
| 45 | u32 sampler; | ||
| 46 | }; | ||
| 47 | |||
| 48 | class GenericEnvironment : public Shader::Environment { | 32 | class GenericEnvironment : public Shader::Environment { |
| 49 | public: | 33 | public: |
| 50 | explicit GenericEnvironment() = default; | 34 | explicit GenericEnvironment() = default; |
diff --git a/src/video_core/texture_cache/formatter.cpp b/src/video_core/texture_cache/formatter.cpp index d10ba4ccd..249cc4d0f 100644 --- a/src/video_core/texture_cache/formatter.cpp +++ b/src/video_core/texture_cache/formatter.cpp | |||
| @@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { | |||
| 43 | return "Invalid"; | 43 | return "Invalid"; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { | 46 | std::string Name(const ImageViewBase& image_view) { |
| 47 | const u32 width = image_view.size.width; | 47 | const u32 width = image_view.size.width; |
| 48 | const u32 height = image_view.size.height; | 48 | const u32 height = image_view.size.height; |
| 49 | const u32 depth = image_view.size.depth; | 49 | const u32 depth = image_view.size.depth; |
| @@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t | |||
| 51 | const u32 num_layers = image_view.range.extent.layers; | 51 | const u32 num_layers = image_view.range.extent.layers; |
| 52 | 52 | ||
| 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | 53 | const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; |
| 54 | switch (type.value_or(image_view.type)) { | 54 | switch (image_view.type) { |
| 55 | case ImageViewType::e1D: | 55 | case ImageViewType::e1D: |
| 56 | return fmt::format("ImageView 1D {}{}", width, level); | 56 | return fmt::format("ImageView 1D {}{}", width, level); |
| 57 | case ImageViewType::e2D: | 57 | case ImageViewType::e2D: |
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index a48413983..c6cf0583f 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h | |||
| @@ -255,8 +255,7 @@ struct RenderTargets; | |||
| 255 | 255 | ||
| 256 | [[nodiscard]] std::string Name(const ImageBase& image); | 256 | [[nodiscard]] std::string Name(const ImageBase& image); |
| 257 | 257 | ||
| 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view, | 258 | [[nodiscard]] std::string Name(const ImageViewBase& image_view); |
| 259 | std::optional<ImageViewType> type = std::nullopt); | ||
| 260 | 259 | ||
| 261 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); | 260 | [[nodiscard]] std::string Name(const RenderTargets& render_targets); |
| 262 | 261 | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index c1d14335e..1a9399455 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -154,6 +154,15 @@ union TextureHandle { | |||
| 154 | }; | 154 | }; |
| 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | 155 | static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); |
| 156 | 156 | ||
| 157 | [[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) { | ||
| 158 | if (via_header_index) { | ||
| 159 | return {raw, raw}; | ||
| 160 | } else { | ||
| 161 | const Tegra::Texture::TextureHandle handle{raw}; | ||
| 162 | return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 157 | struct TICEntry { | 166 | struct TICEntry { |
| 158 | union { | 167 | union { |
| 159 | struct { | 168 | struct { |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 2318c1bda..e27a2b51e 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 282 | VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ | 282 | VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ |
| 283 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, | 283 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, |
| 284 | .pNext = nullptr, | 284 | .pNext = nullptr, |
| 285 | .storageBuffer16BitAccess = false, | 285 | .storageBuffer16BitAccess = true, |
| 286 | .uniformAndStorageBuffer16BitAccess = true, | 286 | .uniformAndStorageBuffer16BitAccess = true, |
| 287 | .storagePushConstant16 = false, | 287 | .storagePushConstant16 = false, |
| 288 | .storageInputOutput16 = false, | 288 | .storageInputOutput16 = false, |