summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/page_table.cpp12
-rw-r--r--src/common/page_table.h15
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/maxwell_3d.h15
-rw-r--r--src/video_core/engines/shader_bytecode.h11
-rw-r--r--src/video_core/gpu.h1
-rw-r--r--src/video_core/memory_manager.h2
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp70
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp153
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp288
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp48
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h45
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp130
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp138
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h13
-rw-r--r--src/video_core/shader/decode/bfe.cpp69
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/transform_feedback.cpp115
-rw-r--r--src/video_core/shader/transform_feedback.h23
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h142
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_params.cpp6
-rw-r--r--src/video_core/texture_cache/texture_cache.h41
33 files changed, 1020 insertions, 391 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 8c13a94fb..83e4e9df2 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -102,6 +102,8 @@ set(HASH_FILES
102 "${VIDEO_CORE}/shader/shader_ir.cpp" 102 "${VIDEO_CORE}/shader/shader_ir.cpp"
103 "${VIDEO_CORE}/shader/shader_ir.h" 103 "${VIDEO_CORE}/shader/shader_ir.h"
104 "${VIDEO_CORE}/shader/track.cpp" 104 "${VIDEO_CORE}/shader/track.cpp"
105 "${VIDEO_CORE}/shader/transform_feedback.cpp"
106 "${VIDEO_CORE}/shader/transform_feedback.h"
105) 107)
106set(COMBINED "") 108set(COMBINED "")
107foreach (F IN LISTS HASH_FILES) 109foreach (F IN LISTS HASH_FILES)
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 1f621fb1f..fbebed715 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -83,6 +83,8 @@ add_custom_command(OUTPUT scm_rev.cpp
83 "${VIDEO_CORE}/shader/shader_ir.cpp" 83 "${VIDEO_CORE}/shader/shader_ir.cpp"
84 "${VIDEO_CORE}/shader/shader_ir.h" 84 "${VIDEO_CORE}/shader/shader_ir.h"
85 "${VIDEO_CORE}/shader/track.cpp" 85 "${VIDEO_CORE}/shader/track.cpp"
86 "${VIDEO_CORE}/shader/transform_feedback.cpp"
87 "${VIDEO_CORE}/shader/transform_feedback.h"
86 # and also check that the scm_rev files haven't changed 88 # and also check that the scm_rev files haven't changed
87 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" 89 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
88 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" 90 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index 69b7abc54..566b57b62 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
16 16
17 pointers.resize(num_page_table_entries); 17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries); 18 attributes.resize(num_page_table_entries);
19 backing_addr.resize(num_page_table_entries);
20 19
21 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the 20 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
22 // vector size is subsequently decreased (via resize), the vector might not automatically 21 // vector size is subsequently decreased (via resize), the vector might not automatically
@@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) {
25 24
26 pointers.shrink_to_fit(); 25 pointers.shrink_to_fit();
27 attributes.shrink_to_fit(); 26 attributes.shrink_to_fit();
27}
28
29BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {}
30
31BackingPageTable::~BackingPageTable() = default;
32
33void BackingPageTable::Resize(std::size_t address_space_width_in_bits) {
34 PageTable::Resize(address_space_width_in_bits);
35 const std::size_t num_page_table_entries = 1ULL
36 << (address_space_width_in_bits - page_size_in_bits);
37 backing_addr.resize(num_page_table_entries);
28 backing_addr.shrink_to_fit(); 38 backing_addr.shrink_to_fit();
29} 39}
30 40
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 8b8ff0bb8..dbc272ab7 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -76,9 +76,20 @@ struct PageTable {
76 */ 76 */
77 std::vector<PageType> attributes; 77 std::vector<PageType> attributes;
78 78
79 std::vector<u64> backing_addr;
80
81 const std::size_t page_size_in_bits{}; 79 const std::size_t page_size_in_bits{};
82}; 80};
83 81
82/**
83 * A more advanced Page Table with the ability to save a backing address when using it
84 * depends on another MMU.
85 */
86struct BackingPageTable : PageTable {
87 explicit BackingPageTable(std::size_t page_size_in_bits);
88 ~BackingPageTable();
89
90 void Resize(std::size_t address_space_width_in_bits);
91
92 std::vector<u64> backing_addr;
93};
94
84} // namespace Common 95} // namespace Common
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 0101e5f0e..91df062d7 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -129,6 +129,8 @@ add_library(video_core STATIC
129 shader/shader_ir.cpp 129 shader/shader_ir.cpp
130 shader/shader_ir.h 130 shader/shader_ir.h
131 shader/track.cpp 131 shader/track.cpp
132 shader/transform_feedback.cpp
133 shader/transform_feedback.h
132 surface.cpp 134 surface.cpp
133 surface.h 135 surface.h
134 texture_cache/format_lookup_table.cpp 136 texture_cache/format_lookup_table.cpp
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8752a1cfb..8a9e9992e 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -628,19 +628,26 @@ public:
628 float depth_range_far; 628 float depth_range_far;
629 }; 629 };
630 630
631 struct alignas(32) TransformFeedbackBinding { 631 struct TransformFeedbackBinding {
632 u32 buffer_enable; 632 u32 buffer_enable;
633 u32 address_high; 633 u32 address_high;
634 u32 address_low; 634 u32 address_low;
635 s32 buffer_size; 635 s32 buffer_size;
636 s32 buffer_offset; 636 s32 buffer_offset;
637 INSERT_UNION_PADDING_WORDS(3);
638
639 GPUVAddr Address() const {
640 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
641 address_low);
642 }
637 }; 643 };
638 static_assert(sizeof(TransformFeedbackBinding) == 32); 644 static_assert(sizeof(TransformFeedbackBinding) == 32);
639 645
640 struct alignas(16) TransformFeedbackLayout { 646 struct TransformFeedbackLayout {
641 u32 stream; 647 u32 stream;
642 u32 varying_count; 648 u32 varying_count;
643 u32 stride; 649 u32 stride;
650 INSERT_UNION_PADDING_WORDS(1);
644 }; 651 };
645 static_assert(sizeof(TransformFeedbackLayout) == 16); 652 static_assert(sizeof(TransformFeedbackLayout) == 16);
646 653
@@ -652,6 +659,10 @@ public:
652 return shader_config[index].enable != 0; 659 return shader_config[index].enable != 0;
653 } 660 }
654 661
662 bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
663 return IsShaderConfigEnabled(static_cast<std::size_t>(type));
664 }
665
655 union { 666 union {
656 struct { 667 struct {
657 INSERT_UNION_PADDING_WORDS(0x45); 668 INSERT_UNION_PADDING_WORDS(0x45);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c9bc83cd7..eba42deb4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -911,14 +911,9 @@ union Instruction {
911 } fadd32i; 911 } fadd32i;
912 912
913 union { 913 union {
914 BitField<20, 8, u64> shift_position; 914 BitField<40, 1, u64> brev;
915 BitField<28, 8, u64> shift_length; 915 BitField<47, 1, u64> rd_cc;
916 BitField<48, 1, u64> negate_b; 916 BitField<48, 1, u64> is_signed;
917 BitField<49, 1, u64> negate_a;
918
919 u64 GetLeftShiftValue() const {
920 return 32 - (shift_position + shift_length);
921 }
922 } bfe; 917 } bfe;
923 918
924 union { 919 union {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ba8c9d665..64acb17df 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 {
39 RGBA32_FLOAT = 0xC0, 39 RGBA32_FLOAT = 0xC0,
40 RGBA32_UINT = 0xC2, 40 RGBA32_UINT = 0xC2,
41 RGBA16_UNORM = 0xC6, 41 RGBA16_UNORM = 0xC6,
42 RGBA16_SNORM = 0xC7,
42 RGBA16_UINT = 0xC9, 43 RGBA16_UINT = 0xC9,
43 RGBA16_FLOAT = 0xCA, 44 RGBA16_FLOAT = 0xCA,
44 RG32_FLOAT = 0xCB, 45 RG32_FLOAT = 0xCB,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index aea010087..073bdb491 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -174,7 +174,7 @@ private:
174 /// End of address space, based on address space in bits. 174 /// End of address space, based on address space in bits.
175 static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; 175 static constexpr GPUVAddr address_space_end{1ULL << address_space_width};
176 176
177 Common::PageTable page_table{page_bits}; 177 Common::BackingPageTable page_table{page_bits};
178 VMAMap vma_map; 178 VMAMap vma_map;
179 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180 180
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index f2c83266e..6d522c318 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
51 MortonCopy<true, PixelFormat::R8UI>, 51 MortonCopy<true, PixelFormat::R8UI>,
52 MortonCopy<true, PixelFormat::RGBA16F>, 52 MortonCopy<true, PixelFormat::RGBA16F>,
53 MortonCopy<true, PixelFormat::RGBA16U>, 53 MortonCopy<true, PixelFormat::RGBA16U>,
54 MortonCopy<true, PixelFormat::RGBA16S>,
54 MortonCopy<true, PixelFormat::RGBA16UI>, 55 MortonCopy<true, PixelFormat::RGBA16UI>,
55 MortonCopy<true, PixelFormat::R11FG11FB10F>, 56 MortonCopy<true, PixelFormat::R11FG11FB10F>,
56 MortonCopy<true, PixelFormat::RGBA32UI>, 57 MortonCopy<true, PixelFormat::RGBA32UI>,
@@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
131 MortonCopy<false, PixelFormat::R8U>, 132 MortonCopy<false, PixelFormat::R8U>,
132 MortonCopy<false, PixelFormat::R8UI>, 133 MortonCopy<false, PixelFormat::R8UI>,
133 MortonCopy<false, PixelFormat::RGBA16F>, 134 MortonCopy<false, PixelFormat::RGBA16F>,
135 MortonCopy<false, PixelFormat::RGBA16S>,
134 MortonCopy<false, PixelFormat::RGBA16U>, 136 MortonCopy<false, PixelFormat::RGBA16U>,
135 MortonCopy<false, PixelFormat::RGBA16UI>, 137 MortonCopy<false, PixelFormat::RGBA16UI>,
136 MortonCopy<false, PixelFormat::R11FG11FB10F>, 138 MortonCopy<false, PixelFormat::R11FG11FB10F>,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8a2db8e36..1af4268a4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -496,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
496 SyncCullMode(); 496 SyncCullMode();
497 SyncPrimitiveRestart(); 497 SyncPrimitiveRestart();
498 SyncScissorTest(); 498 SyncScissorTest();
499 SyncTransformFeedback();
500 SyncPointState(); 499 SyncPointState();
501 SyncPolygonOffset(); 500 SyncPolygonOffset();
502 SyncAlphaTest(); 501 SyncAlphaTest();
@@ -569,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
569 glTextureBarrier(); 568 glTextureBarrier();
570 } 569 }
571 570
572 ++num_queued_commands; 571 BeginTransformFeedback(primitive_mode);
573 572
574 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); 573 const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
575 const GLsizei num_instances = 574 const GLsizei num_instances =
@@ -608,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
608 num_instances, base_instance); 607 num_instances, base_instance);
609 } 608 }
610 } 609 }
610
611 EndTransformFeedback();
612
613 ++num_queued_commands;
611} 614}
612 615
613void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 616void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -1290,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() {
1290 } 1293 }
1291} 1294}
1292 1295
1293void RasterizerOpenGL::SyncTransformFeedback() {
1294 const auto& regs = system.GPU().Maxwell3D().regs;
1295 UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
1296}
1297
1298void RasterizerOpenGL::SyncPointState() { 1296void RasterizerOpenGL::SyncPointState() {
1299 auto& gpu = system.GPU().Maxwell3D(); 1297 auto& gpu = system.GPU().Maxwell3D();
1300 auto& flags = gpu.dirty.flags; 1298 auto& flags = gpu.dirty.flags;
@@ -1370,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
1370 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); 1368 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
1371} 1369}
1372 1370
1371void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1372 const auto& regs = system.GPU().Maxwell3D().regs;
1373 if (regs.tfb_enabled == 0) {
1374 return;
1375 }
1376
1377 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1378 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1379 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
1380
1381 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1382 const auto& binding = regs.tfb_bindings[index];
1383 if (!binding.buffer_enable) {
1384 if (enabled_transform_feedback_buffers[index]) {
1385 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
1386 0);
1387 }
1388 enabled_transform_feedback_buffers[index] = false;
1389 continue;
1390 }
1391 enabled_transform_feedback_buffers[index] = true;
1392
1393 auto& tfb_buffer = transform_feedback_buffers[index];
1394 tfb_buffer.Create();
1395
1396 const GLuint handle = tfb_buffer.handle;
1397 const std::size_t size = binding.buffer_size;
1398 glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
1399 glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
1400 static_cast<GLsizeiptr>(size));
1401 }
1402
1403 glBeginTransformFeedback(GL_POINTS);
1404}
1405
1406void RasterizerOpenGL::EndTransformFeedback() {
1407 const auto& regs = system.GPU().Maxwell3D().regs;
1408 if (regs.tfb_enabled == 0) {
1409 return;
1410 }
1411
1412 glEndTransformFeedback();
1413
1414 for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
1415 const auto& binding = regs.tfb_bindings[index];
1416 if (!binding.buffer_enable) {
1417 continue;
1418 }
1419 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
1420
1421 const GLuint handle = transform_feedback_buffers[index].handle;
1422 const GPUVAddr gpu_addr = binding.Address();
1423 const std::size_t size = binding.buffer_size;
1424 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1425 glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
1426 }
1427}
1428
1373} // namespace OpenGL 1429} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e6424f5d2..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -168,9 +168,6 @@ private:
168 /// Syncs the scissor test state to match the guest state 168 /// Syncs the scissor test state to match the guest state
169 void SyncScissorTest(); 169 void SyncScissorTest();
170 170
171 /// Syncs the transform feedback state to match the guest state
172 void SyncTransformFeedback();
173
174 /// Syncs the point state to match the guest state 171 /// Syncs the point state to match the guest state
175 void SyncPointState(); 172 void SyncPointState();
176 173
@@ -192,6 +189,12 @@ private:
192 /// Syncs the framebuffer sRGB state to match the guest state 189 /// Syncs the framebuffer sRGB state to match the guest state
193 void SyncFramebufferSRGB(); 190 void SyncFramebufferSRGB();
194 191
192 /// Begin a transform feedback
193 void BeginTransformFeedback(GLenum primitive_mode);
194
195 /// End a transform feedback
196 void EndTransformFeedback();
197
195 /// Check for extension that are not strictly required but are needed for correct emulation 198 /// Check for extension that are not strictly required but are needed for correct emulation
196 void CheckExtensions(); 199 void CheckExtensions();
197 200
@@ -229,6 +232,11 @@ private:
229 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 232 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
230 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 233 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
231 234
235 std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
236 transform_feedback_buffers;
237 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
238 enabled_transform_feedback_buffers;
239
232 /// Number of commands queued to the OpenGL driver. Reseted on flush. 240 /// Number of commands queued to the OpenGL driver. Reseted on flush.
233 std::size_t num_queued_commands = 0; 241 std::size_t num_queued_commands = 0;
234 242
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 19d6f3dcb..849839fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,6 +23,7 @@
23#include "video_core/shader/ast.h" 23#include "video_core/shader/ast.h"
24#include "video_core/shader/node.h" 24#include "video_core/shader/node.h"
25#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader/transform_feedback.h"
26 27
27namespace OpenGL { 28namespace OpenGL {
28 29
@@ -36,6 +37,7 @@ using Tegra::Shader::IpaInterpMode;
36using Tegra::Shader::IpaMode; 37using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 38using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using VideoCommon::Shader::BuildTransformFeedback;
39using VideoCommon::Shader::Registry; 41using VideoCommon::Shader::Registry;
40 42
41using namespace std::string_literals; 43using namespace std::string_literals;
@@ -49,6 +51,11 @@ class ExprDecompiler;
49 51
50enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; 52enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
51 53
54constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
55
56constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
57constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
58
52struct TextureOffset {}; 59struct TextureOffset {};
53struct TextureDerivates {}; 60struct TextureDerivates {};
54using TextureArgument = std::pair<Type, Node>; 61using TextureArgument = std::pair<Type, Node>;
@@ -390,12 +397,22 @@ std::string FlowStackTopName(MetaStackClass stack) {
390 return stage == ShaderType::Vertex; 397 return stage == ShaderType::Vertex;
391} 398}
392 399
400struct GenericVaryingDescription {
401 std::string name;
402 u8 first_element = 0;
403 bool is_scalar = false;
404};
405
393class GLSLDecompiler final { 406class GLSLDecompiler final {
394public: 407public:
395 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 408 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
396 ShaderType stage, std::string_view identifier, std::string_view suffix) 409 ShaderType stage, std::string_view identifier, std::string_view suffix)
397 : device{device}, ir{ir}, registry{registry}, stage{stage}, 410 : device{device}, ir{ir}, registry{registry}, stage{stage},
398 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {} 411 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
412 if (stage != ShaderType::Compute) {
413 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
414 }
415 }
399 416
400 void Decompile() { 417 void Decompile() {
401 DeclareHeader(); 418 DeclareHeader();
@@ -403,17 +420,17 @@ public:
403 DeclareGeometry(); 420 DeclareGeometry();
404 DeclareFragment(); 421 DeclareFragment();
405 DeclareCompute(); 422 DeclareCompute();
406 DeclareRegisters();
407 DeclareCustomVariables();
408 DeclarePredicates();
409 DeclareLocalMemory();
410 DeclareInternalFlags();
411 DeclareInputAttributes(); 423 DeclareInputAttributes();
412 DeclareOutputAttributes(); 424 DeclareOutputAttributes();
413 DeclareConstantBuffers();
414 DeclareGlobalMemory();
415 DeclareSamplers();
416 DeclareImages(); 425 DeclareImages();
426 DeclareSamplers();
427 DeclareGlobalMemory();
428 DeclareConstantBuffers();
429 DeclareLocalMemory();
430 DeclareRegisters();
431 DeclarePredicates();
432 DeclareInternalFlags();
433 DeclareCustomVariables();
417 DeclarePhysicalAttributeReader(); 434 DeclarePhysicalAttributeReader();
418 435
419 code.AddLine("void main() {{"); 436 code.AddLine("void main() {{");
@@ -485,7 +502,7 @@ private:
485 if (!identifier.empty()) { 502 if (!identifier.empty()) {
486 code.AddLine("// {}", identifier); 503 code.AddLine("// {}", identifier);
487 } 504 }
488 code.AddLine("#version 430 core"); 505 code.AddLine("#version 440 core");
489 code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); 506 code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
490 if (device.HasShaderBallot()) { 507 if (device.HasShaderBallot()) {
491 code.AddLine("#extension GL_ARB_shader_ballot : require"); 508 code.AddLine("#extension GL_ARB_shader_ballot : require");
@@ -570,7 +587,13 @@ private:
570 code.AddLine("out gl_PerVertex {{"); 587 code.AddLine("out gl_PerVertex {{");
571 ++code.scope; 588 ++code.scope;
572 589
573 code.AddLine("vec4 gl_Position;"); 590 auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
591 if (!pos_xfb.empty()) {
592 pos_xfb = fmt::format("layout ({}) ", pos_xfb);
593 }
594 const char* pos_type =
595 FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
596 code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
574 597
575 for (const auto attribute : ir.GetOutputAttributes()) { 598 for (const auto attribute : ir.GetOutputAttributes()) {
576 if (attribute == Attribute::Index::ClipDistances0123 || 599 if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -703,7 +726,7 @@ private:
703 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { 726 void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
704 const u32 location{GetGenericAttributeIndex(index)}; 727 const u32 location{GetGenericAttributeIndex(index)};
705 728
706 std::string name{GetInputAttribute(index)}; 729 std::string name{GetGenericInputAttribute(index)};
707 if (stage == ShaderType::Geometry) { 730 if (stage == ShaderType::Geometry) {
708 name = "gs_" + name + "[]"; 731 name = "gs_" + name + "[]";
709 } 732 }
@@ -740,9 +763,59 @@ private:
740 } 763 }
741 } 764 }
742 765
766 std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
767 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
768 const auto it = transform_feedback.find(location);
769 if (it == transform_feedback.end()) {
770 return {};
771 }
772 return it->second.components;
773 }
774
775 std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
776 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
777 const auto it = transform_feedback.find(location);
778 if (it == transform_feedback.end()) {
779 return {};
780 }
781
782 const VaryingTFB& tfb = it->second;
783 return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
784 tfb.offset, tfb.stride);
785 }
786
743 void DeclareOutputAttribute(Attribute::Index index) { 787 void DeclareOutputAttribute(Attribute::Index index) {
744 const u32 location{GetGenericAttributeIndex(index)}; 788 static constexpr std::string_view swizzle = "xyzw";
745 code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); 789 u8 element = 0;
790 while (element < 4) {
791 auto xfb = GetTransformFeedbackDecoration(index, element);
792 if (!xfb.empty()) {
793 xfb = fmt::format(", {}", xfb);
794 }
795 const std::size_t remainder = 4 - element;
796 const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
797 const char* const type = FLOAT_TYPES.at(num_components - 1);
798
799 const u32 location = GetGenericAttributeIndex(index);
800
801 GenericVaryingDescription description;
802 description.first_element = static_cast<u8>(element);
803 description.is_scalar = num_components == 1;
804 description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
805 if (element != 0 || num_components != 4) {
806 const std::string_view name_swizzle = swizzle.substr(element, num_components);
807 description.name = fmt::format("{}_{}", description.name, name_swizzle);
808 }
809 for (std::size_t i = 0; i < num_components; ++i) {
810 const u8 offset = static_cast<u8>(location * 4 + element + i);
811 varying_description.insert({offset, description});
812 }
813
814 code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
815 xfb, type, description.name);
816
817 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
818 }
746 } 819 }
747 820
748 void DeclareConstantBuffers() { 821 void DeclareConstantBuffers() {
@@ -1095,7 +1168,7 @@ private:
1095 return {"0", Type::Int}; 1168 return {"0", Type::Int};
1096 default: 1169 default:
1097 if (IsGenericAttribute(attribute)) { 1170 if (IsGenericAttribute(attribute)) {
1098 return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), 1171 return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
1099 Type::Float}; 1172 Type::Float};
1100 } 1173 }
1101 break; 1174 break;
@@ -1164,8 +1237,7 @@ private:
1164 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; 1237 return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
1165 default: 1238 default:
1166 if (IsGenericAttribute(attribute)) { 1239 if (IsGenericAttribute(attribute)) {
1167 return { 1240 return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
1168 {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
1169 } 1241 }
1170 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); 1242 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
1171 return {}; 1243 return {};
@@ -1937,16 +2009,19 @@ private:
1937 expr += GetSampler(meta->sampler); 2009 expr += GetSampler(meta->sampler);
1938 expr += ", "; 2010 expr += ", ";
1939 2011
1940 expr += constructors.at(operation.GetOperandsCount() - 1); 2012 expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
1941 expr += '('; 2013 expr += '(';
1942 for (std::size_t i = 0; i < count; ++i) { 2014 for (std::size_t i = 0; i < count; ++i) {
1943 expr += VisitOperand(operation, i).AsInt(); 2015 if (i > 0) {
1944 const std::size_t next = i + 1;
1945 if (next == count)
1946 expr += ')';
1947 else if (next < count)
1948 expr += ", "; 2016 expr += ", ";
2017 }
2018 expr += VisitOperand(operation, i).AsInt();
2019 }
2020 if (meta->array) {
2021 expr += ", ";
2022 expr += Visit(meta->array).AsInt();
1949 } 2023 }
2024 expr += ')';
1950 2025
1951 if (meta->lod && !meta->sampler.IsBuffer()) { 2026 if (meta->lod && !meta->sampler.IsBuffer()) {
1952 expr += ", "; 2027 expr += ", ";
@@ -2376,27 +2451,34 @@ private:
2376 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2451 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2377 2452
2378 std::string GetRegister(u32 index) const { 2453 std::string GetRegister(u32 index) const {
2379 return GetDeclarationWithSuffix(index, "gpr"); 2454 return AppendSuffix(index, "gpr");
2380 } 2455 }
2381 2456
2382 std::string GetCustomVariable(u32 index) const { 2457 std::string GetCustomVariable(u32 index) const {
2383 return GetDeclarationWithSuffix(index, "custom_var"); 2458 return AppendSuffix(index, "custom_var");
2384 } 2459 }
2385 2460
2386 std::string GetPredicate(Tegra::Shader::Pred pred) const { 2461 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2387 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); 2462 return AppendSuffix(static_cast<u32>(pred), "pred");
2388 } 2463 }
2389 2464
2390 std::string GetInputAttribute(Attribute::Index attribute) const { 2465 std::string GetGenericInputAttribute(Attribute::Index attribute) const {
2391 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); 2466 return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
2392 } 2467 }
2393 2468
2394 std::string GetOutputAttribute(Attribute::Index attribute) const { 2469 std::unordered_map<u8, GenericVaryingDescription> varying_description;
2395 return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); 2470
2471 std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
2472 const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
2473 const auto& description = varying_description.at(offset);
2474 if (description.is_scalar) {
2475 return description.name;
2476 }
2477 return fmt::format("{}[{}]", description.name, element - description.first_element);
2396 } 2478 }
2397 2479
2398 std::string GetConstBuffer(u32 index) const { 2480 std::string GetConstBuffer(u32 index) const {
2399 return GetDeclarationWithSuffix(index, "cbuf"); 2481 return AppendSuffix(index, "cbuf");
2400 } 2482 }
2401 2483
2402 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { 2484 std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2409,7 +2491,7 @@ private:
2409 } 2491 }
2410 2492
2411 std::string GetConstBufferBlock(u32 index) const { 2493 std::string GetConstBufferBlock(u32 index) const {
2412 return GetDeclarationWithSuffix(index, "cbuf_block"); 2494 return AppendSuffix(index, "cbuf_block");
2413 } 2495 }
2414 2496
2415 std::string GetLocalMemory() const { 2497 std::string GetLocalMemory() const {
@@ -2434,14 +2516,14 @@ private:
2434 } 2516 }
2435 2517
2436 std::string GetSampler(const Sampler& sampler) const { 2518 std::string GetSampler(const Sampler& sampler) const {
2437 return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); 2519 return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
2438 } 2520 }
2439 2521
2440 std::string GetImage(const Image& image) const { 2522 std::string GetImage(const Image& image) const {
2441 return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); 2523 return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
2442 } 2524 }
2443 2525
2444 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { 2526 std::string AppendSuffix(u32 index, std::string_view name) const {
2445 if (suffix.empty()) { 2527 if (suffix.empty()) {
2446 return fmt::format("{}{}", name, index); 2528 return fmt::format("{}{}", name, index);
2447 } else { 2529 } else {
@@ -2477,6 +2559,7 @@ private:
2477 const std::string_view identifier; 2559 const std::string_view identifier;
2478 const std::string_view suffix; 2560 const std::string_view suffix;
2479 const Header header; 2561 const Header header;
2562 std::unordered_map<u8, VaryingTFB> transform_feedback;
2480 2563
2481 ShaderWriter code; 2564 ShaderWriter code;
2482 2565
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 2d3838a7a..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI 53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U 55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
56 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
56 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI 57 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
57 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F 58 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
58 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI 59 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 12333e8c9..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,8 +5,11 @@
5#include <algorithm> 5#include <algorithm>
6#include <cstddef> 6#include <cstddef>
7#include <cstdlib> 7#include <cstdlib>
8#include <cstring>
8#include <memory> 9#include <memory>
10
9#include <glad/glad.h> 11#include <glad/glad.h>
12
10#include "common/assert.h" 13#include "common/assert.h"
11#include "common/logging/log.h" 14#include "common/logging/log.h"
12#include "common/microprofile.h" 15#include "common/microprofile.h"
@@ -25,6 +28,8 @@
25 28
26namespace OpenGL { 29namespace OpenGL {
27 30
31namespace {
32
28// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have 33// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
29// to wait on available presentation frames. 34// to wait on available presentation frames.
30constexpr std::size_t SWAP_CHAIN_SIZE = 3; 35constexpr std::size_t SWAP_CHAIN_SIZE = 3;
@@ -41,124 +46,6 @@ struct Frame {
41 bool is_srgb{}; /// Framebuffer is sRGB or RGB 46 bool is_srgb{}; /// Framebuffer is sRGB or RGB
42}; 47};
43 48
44/**
45 * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
46 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
47 * helper class that the renderer uses to sync frames between the render thread and the presentation
48 * thread
49 */
50class FrameMailbox {
51public:
52 std::mutex swap_chain_lock;
53 std::condition_variable present_cv;
54 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
55 std::queue<Frame*> free_queue;
56 std::deque<Frame*> present_queue;
57 Frame* previous_frame{};
58
59 FrameMailbox() {
60 for (auto& frame : swap_chain) {
61 free_queue.push(&frame);
62 }
63 }
64
65 ~FrameMailbox() {
66 // lock the mutex and clear out the present and free_queues and notify any people who are
67 // blocked to prevent deadlock on shutdown
68 std::scoped_lock lock{swap_chain_lock};
69 std::queue<Frame*>().swap(free_queue);
70 present_queue.clear();
71 present_cv.notify_all();
72 }
73
74 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
75 frame->present.Release();
76 frame->present.Create();
77 GLint previous_draw_fbo{};
78 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
79 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
80 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
81 frame->color.handle);
82 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
83 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
84 }
85 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
86 frame->color_reloaded = false;
87 }
88
89 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
90 // Recreate the color texture attachment
91 frame->color.Release();
92 frame->color.Create();
93 const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
94 glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
95
96 // Recreate the FBO for the render target
97 frame->render.Release();
98 frame->render.Create();
99 glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
100 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
101 frame->color.handle);
102 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
103 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
104 }
105
106 frame->width = width;
107 frame->height = height;
108 frame->color_reloaded = true;
109 }
110
111 Frame* GetRenderFrame() {
112 std::unique_lock lock{swap_chain_lock};
113
114 // If theres no free frames, we will reuse the oldest render frame
115 if (free_queue.empty()) {
116 auto frame = present_queue.back();
117 present_queue.pop_back();
118 return frame;
119 }
120
121 Frame* frame = free_queue.front();
122 free_queue.pop();
123 return frame;
124 }
125
126 void ReleaseRenderFrame(Frame* frame) {
127 std::unique_lock lock{swap_chain_lock};
128 present_queue.push_front(frame);
129 present_cv.notify_one();
130 }
131
132 Frame* TryGetPresentFrame(int timeout_ms) {
133 std::unique_lock lock{swap_chain_lock};
134 // wait for new entries in the present_queue
135 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
136 [&] { return !present_queue.empty(); });
137 if (present_queue.empty()) {
138 // timed out waiting for a frame to draw so return the previous frame
139 return previous_frame;
140 }
141
142 // free the previous frame and add it back to the free queue
143 if (previous_frame) {
144 free_queue.push(previous_frame);
145 }
146
147 // the newest entries are pushed to the front of the queue
148 Frame* frame = present_queue.front();
149 present_queue.pop_front();
150 // remove all old entries from the present queue and move them back to the free_queue
151 for (auto f : present_queue) {
152 free_queue.push(f);
153 }
154 present_queue.clear();
155 previous_frame = frame;
156 return frame;
157 }
158};
159
160namespace {
161
162constexpr char VERTEX_SHADER[] = R"( 49constexpr char VERTEX_SHADER[] = R"(
163#version 430 core 50#version 430 core
164 51
@@ -211,6 +98,24 @@ struct ScreenRectVertex {
211 std::array<GLfloat, 2> tex_coord; 98 std::array<GLfloat, 2> tex_coord;
212}; 99};
213 100
101/// Returns true if any debug tool is attached
102bool HasDebugTool() {
103 const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
104 if (nsight) {
105 return true;
106 }
107
108 GLint num_extensions;
109 glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
110 for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
111 const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
112 if (!std::strcmp(name, "GL_EXT_debug_tool")) {
113 return true;
114 }
115 }
116 return false;
117}
118
214/** 119/**
215 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left 120 * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
216 * corner and (width, height) on the lower-bottom. 121 * corner and (width, height) on the lower-bottom.
@@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
294 199
295} // Anonymous namespace 200} // Anonymous namespace
296 201
202/**
203 * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
204 * but also make sure that rendering happens at the pace that the frontend dictates. This is a
205 * helper class that the renderer uses to sync frames between the render thread and the presentation
206 * thread
207 */
208class FrameMailbox {
209public:
210 std::mutex swap_chain_lock;
211 std::condition_variable present_cv;
212 std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
213 std::queue<Frame*> free_queue;
214 std::deque<Frame*> present_queue;
215 Frame* previous_frame{};
216
217 FrameMailbox() : has_debug_tool{HasDebugTool()} {
218 for (auto& frame : swap_chain) {
219 free_queue.push(&frame);
220 }
221 }
222
223 ~FrameMailbox() {
224 // lock the mutex and clear out the present and free_queues and notify any people who are
225 // blocked to prevent deadlock on shutdown
226 std::scoped_lock lock{swap_chain_lock};
227 std::queue<Frame*>().swap(free_queue);
228 present_queue.clear();
229 present_cv.notify_all();
230 }
231
232 void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
233 frame->present.Release();
234 frame->present.Create();
235 GLint previous_draw_fbo{};
236 glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
237 glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
238 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
239 frame->color.handle);
240 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
241 LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
242 }
243 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
244 frame->color_reloaded = false;
245 }
246
247 void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
248 // Recreate the color texture attachment
249 frame->color.Release();
250 frame->color.Create();
251 const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
252 glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
253
254 // Recreate the FBO for the render target
255 frame->render.Release();
256 frame->render.Create();
257 glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
258 glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
259 frame->color.handle);
260 if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
261 LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
262 }
263
264 frame->width = width;
265 frame->height = height;
266 frame->color_reloaded = true;
267 }
268
269 Frame* GetRenderFrame() {
270 std::unique_lock lock{swap_chain_lock};
271
272 // If theres no free frames, we will reuse the oldest render frame
273 if (free_queue.empty()) {
274 auto frame = present_queue.back();
275 present_queue.pop_back();
276 return frame;
277 }
278
279 Frame* frame = free_queue.front();
280 free_queue.pop();
281 return frame;
282 }
283
284 void ReleaseRenderFrame(Frame* frame) {
285 std::unique_lock lock{swap_chain_lock};
286 present_queue.push_front(frame);
287 present_cv.notify_one();
288
289 DebugNotifyNextFrame();
290 }
291
292 Frame* TryGetPresentFrame(int timeout_ms) {
293 DebugWaitForNextFrame();
294
295 std::unique_lock lock{swap_chain_lock};
296 // wait for new entries in the present_queue
297 present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
298 [&] { return !present_queue.empty(); });
299 if (present_queue.empty()) {
300 // timed out waiting for a frame to draw so return the previous frame
301 return previous_frame;
302 }
303
304 // free the previous frame and add it back to the free queue
305 if (previous_frame) {
306 free_queue.push(previous_frame);
307 }
308
309 // the newest entries are pushed to the front of the queue
310 Frame* frame = present_queue.front();
311 present_queue.pop_front();
312 // remove all old entries from the present queue and move them back to the free_queue
313 for (auto f : present_queue) {
314 free_queue.push(f);
315 }
316 present_queue.clear();
317 previous_frame = frame;
318 return frame;
319 }
320
321private:
322 std::mutex debug_synch_mutex;
323 std::condition_variable debug_synch_condition;
324 std::atomic_int frame_for_debug{};
325 const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
326
327 /// Signal that a new frame is available (called from GPU thread)
328 void DebugNotifyNextFrame() {
329 if (!has_debug_tool) {
330 return;
331 }
332 frame_for_debug++;
333 std::lock_guard lock{debug_synch_mutex};
334 debug_synch_condition.notify_one();
335 }
336
337 /// Wait for a new frame to be available (called from presentation thread)
338 void DebugWaitForNextFrame() {
339 if (!has_debug_tool) {
340 return;
341 }
342 const int last_frame = frame_for_debug;
343 std::unique_lock lock{debug_synch_mutex};
344 debug_synch_condition.wait(lock,
345 [this, last_frame] { return frame_for_debug > last_frame; });
346 }
347};
348
297RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) 349RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
298 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, 350 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
299 frame_mailbox{std::make_unique<FrameMailbox>()} {} 351 frame_mailbox{std::make_unique<FrameMailbox>()} {}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index df3ac707c..0e2e5e6c7 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -125,6 +125,7 @@ struct FormatTuple {
125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI 125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI
126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F 126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F
127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U 127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U
128 {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S
128 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI 129 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI
129 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F 130 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F
130 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI 131 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI
@@ -331,6 +332,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
331 return vk::Format::eR16G16B16Unorm; 332 return vk::Format::eR16G16B16Unorm;
332 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 333 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
333 return vk::Format::eR16G16B16A16Unorm; 334 return vk::Format::eR16G16B16A16Unorm;
335 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
336 return vk::Format::eA2B10G10R10UnormPack32;
334 default: 337 default:
335 break; 338 break;
336 } 339 }
@@ -364,6 +367,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
364 return vk::Format::eR8G8B8A8Uint; 367 return vk::Format::eR8G8B8A8Uint;
365 case Maxwell::VertexAttribute::Size::Size_32: 368 case Maxwell::VertexAttribute::Size::Size_32:
366 return vk::Format::eR32Uint; 369 return vk::Format::eR32Uint;
370 case Maxwell::VertexAttribute::Size::Size_32_32:
371 return vk::Format::eR32G32Uint;
372 case Maxwell::VertexAttribute::Size::Size_32_32_32:
373 return vk::Format::eR32G32B32Uint;
367 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 374 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
368 return vk::Format::eR32G32B32A32Uint; 375 return vk::Format::eR32G32B32A32Uint;
369 default: 376 default:
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 886bde3b9..28d2fbc4f 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
107 features.occlusionQueryPrecise = true; 107 features.occlusionQueryPrecise = true;
108 features.fragmentStoresAndAtomics = true; 108 features.fragmentStoresAndAtomics = true;
109 features.shaderImageGatherExtended = true; 109 features.shaderImageGatherExtended = true;
110 features.shaderStorageImageReadWithoutFormat = 110 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
111 is_shader_storage_img_read_without_format_supported;
112 features.shaderStorageImageWriteWithoutFormat = true; 111 features.shaderStorageImageWriteWithoutFormat = true;
113 features.textureCompressionASTC_LDR = is_optimal_astc_supported; 112 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
114 113
@@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
148 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); 147 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
149 } 148 }
150 149
150 vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
151 if (ext_transform_feedback) {
152 transform_feedback.transformFeedback = true;
153 transform_feedback.geometryStreams = true;
154 SetNext(next, transform_feedback);
155 } else {
156 LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
157 }
158
151 if (!ext_depth_range_unrestricted) { 159 if (!ext_depth_range_unrestricted) {
152 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 160 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
153 } 161 }
@@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
385 } 393 }
386 }; 394 };
387 395
388 extensions.reserve(14); 396 extensions.reserve(15);
389 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 397 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
390 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 398 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
391 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); 399 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
@@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
397 405
398 [[maybe_unused]] const bool nsight = 406 [[maybe_unused]] const bool nsight =
399 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); 407 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
400 bool khr_shader_float16_int8{}; 408 bool has_khr_shader_float16_int8{};
401 bool ext_subgroup_size_control{}; 409 bool has_ext_subgroup_size_control{};
410 bool has_ext_transform_feedback{};
402 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 411 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
403 Test(extension, khr_uniform_buffer_standard_layout, 412 Test(extension, khr_uniform_buffer_standard_layout,
404 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 413 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
405 Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); 414 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
415 false);
406 Test(extension, ext_depth_range_unrestricted, 416 Test(extension, ext_depth_range_unrestricted,
407 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); 417 VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
408 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); 418 Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
409 Test(extension, ext_shader_viewport_index_layer, 419 Test(extension, ext_shader_viewport_index_layer,
410 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); 420 VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
411 Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, 421 Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
422 false);
423 Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
412 false); 424 false);
413 if (Settings::values.renderer_debug) { 425 if (Settings::values.renderer_debug) {
414 Test(extension, nv_device_diagnostic_checkpoints, 426 Test(extension, nv_device_diagnostic_checkpoints,
@@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
416 } 428 }
417 } 429 }
418 430
419 if (khr_shader_float16_int8) { 431 if (has_khr_shader_float16_int8) {
420 is_float16_supported = 432 is_float16_supported =
421 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 433 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
422 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 434 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
423 } 435 }
424 436
425 if (ext_subgroup_size_control) { 437 if (has_ext_subgroup_size_control) {
426 const auto features = 438 const auto features =
427 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 439 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
428 const auto properties = 440 const auto properties =
@@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
439 is_warp_potentially_bigger = true; 451 is_warp_potentially_bigger = true;
440 } 452 }
441 453
454 if (has_ext_transform_feedback) {
455 const auto features =
456 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
457 const auto properties =
458 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
459
460 if (features.transformFeedback && features.geometryStreams &&
461 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
462 properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
463 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
464 ext_transform_feedback = true;
465 }
466 }
467
442 return extensions; 468 return extensions;
443} 469}
444 470
@@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
467 493
468void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 494void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
469 const auto supported_features{physical.getFeatures(dldi)}; 495 const auto supported_features{physical.getFeatures(dldi)};
470 is_shader_storage_img_read_without_format_supported = 496 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
471 supported_features.shaderStorageImageReadWithoutFormat;
472 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 497 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
473} 498}
474 499
@@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
510 vk::Format::eR32G32Sfloat, 535 vk::Format::eR32G32Sfloat,
511 vk::Format::eR32G32Uint, 536 vk::Format::eR32G32Uint,
512 vk::Format::eR16G16B16A16Uint, 537 vk::Format::eR16G16B16A16Uint,
538 vk::Format::eR16G16B16A16Snorm,
513 vk::Format::eR16G16B16A16Unorm, 539 vk::Format::eR16G16B16A16Unorm,
514 vk::Format::eR16G16Unorm, 540 vk::Format::eR16G16Unorm,
515 vk::Format::eR16G16Snorm, 541 vk::Format::eR16G16Snorm,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 2c27ad730..6e656517f 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,11 +122,6 @@ public:
122 return properties.limits.maxPushConstantsSize; 122 return properties.limits.maxPushConstantsSize;
123 } 123 }
124 124
125 /// Returns true if Shader storage Image Read Without Format supported.
126 bool IsShaderStorageImageReadWithoutFormatSupported() const {
127 return is_shader_storage_img_read_without_format_supported;
128 }
129
130 /// Returns true if ASTC is natively supported. 125 /// Returns true if ASTC is natively supported.
131 bool IsOptimalAstcSupported() const { 126 bool IsOptimalAstcSupported() const {
132 return is_optimal_astc_supported; 127 return is_optimal_astc_supported;
@@ -147,6 +142,11 @@ public:
147 return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; 142 return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
148 } 143 }
149 144
145 /// Returns true if formatless image load is supported.
146 bool IsFormatlessImageLoadSupported() const {
147 return is_formatless_image_load_supported;
148 }
149
150 /// Returns true if the device supports VK_EXT_scalar_block_layout. 150 /// Returns true if the device supports VK_EXT_scalar_block_layout.
151 bool IsKhrUniformBufferStandardLayoutSupported() const { 151 bool IsKhrUniformBufferStandardLayoutSupported() const {
152 return khr_uniform_buffer_standard_layout; 152 return khr_uniform_buffer_standard_layout;
@@ -167,6 +167,11 @@ public:
167 return ext_shader_viewport_index_layer; 167 return ext_shader_viewport_index_layer;
168 } 168 }
169 169
170 /// Returns true if the device supports VK_EXT_transform_feedback.
171 bool IsExtTransformFeedbackSupported() const {
172 return ext_transform_feedback;
173 }
174
170 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. 175 /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
171 bool IsNvDeviceDiagnosticCheckpoints() const { 176 bool IsNvDeviceDiagnosticCheckpoints() const {
172 return nv_device_diagnostic_checkpoints; 177 return nv_device_diagnostic_checkpoints;
@@ -214,26 +219,26 @@ private:
214 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
215 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
216 221
217 const vk::PhysicalDevice physical; ///< Physical device. 222 const vk::PhysicalDevice physical; ///< Physical device.
218 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 223 vk::DispatchLoaderDynamic dld; ///< Device function pointers.
219 vk::PhysicalDeviceProperties properties; ///< Device properties. 224 vk::PhysicalDeviceProperties properties; ///< Device properties.
220 UniqueDevice logical; ///< Logical device. 225 UniqueDevice logical; ///< Logical device.
221 vk::Queue graphics_queue; ///< Main graphics queue. 226 vk::Queue graphics_queue; ///< Main graphics queue.
222 vk::Queue present_queue; ///< Main present queue. 227 vk::Queue present_queue; ///< Main present queue.
223 u32 graphics_family{}; ///< Main graphics queue family index. 228 u32 graphics_family{}; ///< Main graphics queue family index.
224 u32 present_family{}; ///< Main present queue family index. 229 u32 present_family{}; ///< Main present queue family index.
225 vk::DriverIdKHR driver_id{}; ///< Driver ID. 230 vk::DriverIdKHR driver_id{}; ///< Driver ID.
226 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. 231 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
227 bool is_optimal_astc_supported{}; ///< Support for native ASTC. 232 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
228 bool is_float16_supported{}; ///< Support for float16 arithmetics. 233 bool is_float16_supported{}; ///< Support for float16 arithmetics.
229 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. 234 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
235 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
230 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 236 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
231 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 237 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
232 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. 238 bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
233 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. 239 bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
240 bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
234 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. 241 bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
235 bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
236 ///< image read without format
237 242
238 // Telemetry parameters 243 // Telemetry parameters
239 std::string vendor_name; ///< Device's driver name. 244 std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index ebf85f311..91e7b7791 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
179VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 179VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
180 const VKDevice& device, VKScheduler& scheduler, 180 const VKDevice& device, VKScheduler& scheduler,
181 VKDescriptorPool& descriptor_pool, 181 VKDescriptorPool& descriptor_pool,
182 VKUpdateDescriptorQueue& update_descriptor_queue) 182 VKUpdateDescriptorQueue& update_descriptor_queue,
183 VKRenderPassCache& renderpass_cache)
183 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, 184 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
184 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, 185 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
185 renderpass_cache(device) {} 186 renderpass_cache{renderpass_cache} {}
186 187
187VKPipelineCache::~VKPipelineCache() = default; 188VKPipelineCache::~VKPipelineCache() = default;
188 189
@@ -273,9 +274,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
273 specialization.workgroup_size = key.workgroup_size; 274 specialization.workgroup_size = key.workgroup_size;
274 specialization.shared_memory_size = key.shared_memory_size; 275 specialization.shared_memory_size = key.shared_memory_size;
275 276
276 const SPIRVShader spirv_shader{ 277 const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
277 Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), 278 shader->GetRegistry(), specialization),
278 shader->GetEntries()}; 279 shader->GetEntries()};
279 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, 280 entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
280 update_descriptor_queue, spirv_shader); 281 update_descriptor_queue, spirv_shader);
281 return *entry; 282 return *entry;
@@ -324,8 +325,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
324 const auto& gpu = system.GPU().Maxwell3D(); 325 const auto& gpu = system.GPU().Maxwell3D();
325 326
326 Specialization specialization; 327 Specialization specialization;
327 specialization.primitive_topology = fixed_state.input_assembly.topology; 328 if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
328 if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
329 ASSERT(fixed_state.input_assembly.point_size != 0.0f); 329 ASSERT(fixed_state.input_assembly.point_size != 0.0f);
330 specialization.point_size = fixed_state.input_assembly.point_size; 330 specialization.point_size = fixed_state.input_assembly.point_size;
331 } 331 }
@@ -333,9 +333,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
333 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; 333 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
334 } 334 }
335 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 335 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
336 specialization.tessellation.primitive = fixed_state.tessellation.primitive;
337 specialization.tessellation.spacing = fixed_state.tessellation.spacing;
338 specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
339 336
340 SPIRVProgram program; 337 SPIRVProgram program;
341 std::vector<vk::DescriptorSetLayoutBinding> bindings; 338 std::vector<vk::DescriptorSetLayoutBinding> bindings;
@@ -356,8 +353,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
356 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 353 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
357 const auto program_type = GetShaderType(program_enum); 354 const auto program_type = GetShaderType(program_enum);
358 const auto& entries = shader->GetEntries(); 355 const auto& entries = shader->GetEntries();
359 program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), 356 program[stage] = {
360 entries}; 357 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
358 entries};
361 359
362 if (program_enum == Maxwell::ShaderProgram::VertexA) { 360 if (program_enum == Maxwell::ShaderProgram::VertexA) {
363 // VertexB was combined with VertexA, so we skip the VertexB iteration 361 // VertexB was combined with VertexA, so we skip the VertexB iteration
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index e292526bb..c4c112290 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -132,6 +132,10 @@ public:
132 return shader_ir; 132 return shader_ir;
133 } 133 }
134 134
135 const VideoCommon::Shader::Registry& GetRegistry() const {
136 return registry;
137 }
138
135 const VideoCommon::Shader::ShaderIR& GetIR() const { 139 const VideoCommon::Shader::ShaderIR& GetIR() const {
136 return shader_ir; 140 return shader_ir;
137 } 141 }
@@ -157,7 +161,8 @@ public:
157 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 161 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
158 const VKDevice& device, VKScheduler& scheduler, 162 const VKDevice& device, VKScheduler& scheduler,
159 VKDescriptorPool& descriptor_pool, 163 VKDescriptorPool& descriptor_pool,
160 VKUpdateDescriptorQueue& update_descriptor_queue); 164 VKUpdateDescriptorQueue& update_descriptor_queue,
165 VKRenderPassCache& renderpass_cache);
161 ~VKPipelineCache(); 166 ~VKPipelineCache();
162 167
163 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); 168 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
@@ -180,8 +185,7 @@ private:
180 VKScheduler& scheduler; 185 VKScheduler& scheduler;
181 VKDescriptorPool& descriptor_pool; 186 VKDescriptorPool& descriptor_pool;
182 VKUpdateDescriptorQueue& update_descriptor_queue; 187 VKUpdateDescriptorQueue& update_descriptor_queue;
183 188 VKRenderPassCache& renderpass_cache;
184 VKRenderPassCache renderpass_cache;
185 189
186 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 190 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
187 191
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 2bcb17b56..755aad643 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
287 screen_info{screen_info}, device{device}, resource_manager{resource_manager}, 287 screen_info{screen_info}, device{device}, resource_manager{resource_manager},
288 memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, 288 memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
289 staging_pool(device, memory_manager, scheduler), descriptor_pool(device), 289 staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
290 update_descriptor_queue(device, scheduler), 290 update_descriptor_queue(device, scheduler), renderpass_cache(device),
291 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 291 quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
292 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), 292 uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
293 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, 293 texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
294 staging_pool), 294 staging_pool),
295 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), 295 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
296 renderpass_cache),
296 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 297 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
297 sampler_cache(device), query_cache(system, *this, device, scheduler) { 298 sampler_cache(device), query_cache(system, *this, device, scheduler) {
298 scheduler.SetQueryCache(query_cache); 299 scheduler.SetQueryCache(query_cache);
@@ -347,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
347 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); 348 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
348 } 349 }
349 350
351 BeginTransformFeedback();
352
350 const auto pipeline_layout = pipeline.GetLayout(); 353 const auto pipeline_layout = pipeline.GetLayout();
351 const auto descriptor_set = pipeline.CommitDescriptorSet(); 354 const auto descriptor_set = pipeline.CommitDescriptorSet();
352 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { 355 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
@@ -356,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
356 } 359 }
357 draw_params.Draw(cmdbuf, dld); 360 draw_params.Draw(cmdbuf, dld);
358 }); 361 });
362
363 EndTransformFeedback();
359} 364}
360 365
361void RasterizerVulkan::Clear() { 366void RasterizerVulkan::Clear() {
362 MICROPROFILE_SCOPE(Vulkan_Clearing); 367 MICROPROFILE_SCOPE(Vulkan_Clearing);
363 368
364 query_cache.UpdateCounters();
365
366 const auto& gpu = system.GPU().Maxwell3D(); 369 const auto& gpu = system.GPU().Maxwell3D();
367 if (!system.GPU().Maxwell3D().ShouldExecute()) { 370 if (!system.GPU().Maxwell3D().ShouldExecute()) {
368 return; 371 return;
369 } 372 }
370 373
374 sampled_views.clear();
375 image_views.clear();
376
377 query_cache.UpdateCounters();
378
371 const auto& regs = gpu.regs; 379 const auto& regs = gpu.regs;
372 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 380 const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
373 regs.clear_buffers.A; 381 regs.clear_buffers.A;
@@ -376,52 +384,54 @@ void RasterizerVulkan::Clear() {
376 if (!use_color && !use_depth && !use_stencil) { 384 if (!use_color && !use_depth && !use_stencil) {
377 return; 385 return;
378 } 386 }
379 // Clearing images requires to be out of a renderpass
380 scheduler.RequestOutsideRenderPassOperationContext();
381 387
382 // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. 388 [[maybe_unused]] const auto texceptions = UpdateAttachments();
389 DEBUG_ASSERT(texceptions.none());
390 SetupImageTransitions(0, color_attachments, zeta_attachment);
383 391
384 if (use_color) { 392 const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
385 View color_view; 393 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
386 { 394 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
387 MICROPROFILE_SCOPE(Vulkan_RenderTargets); 395
388 color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); 396 const auto& scissor = regs.scissor_test[0];
389 } 397 const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
398 vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
399 scissor_extent.width = std::min(scissor_extent.width, render_area.width);
400 scissor_extent.height = std::min(scissor_extent.height, render_area.height);
390 401
391 color_view->Transition(vk::ImageLayout::eTransferDstOptimal, 402 const u32 layer = regs.clear_buffers.layer;
392 vk::PipelineStageFlagBits::eTransfer, 403 const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1);
393 vk::AccessFlagBits::eTransferWrite);
394 404
405 if (use_color) {
395 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], 406 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
396 regs.clear_color[2], regs.clear_color[3]}; 407 regs.clear_color[2], regs.clear_color[3]};
397 const vk::ClearColorValue clear(clear_color); 408 const vk::ClearValue clear_value{clear_color};
398 scheduler.Record([image = color_view->GetImage(), 409 const u32 color_attachment = regs.clear_buffers.RT;
399 subresource = color_view->GetImageSubresourceRange(), 410 scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) {
400 clear](auto cmdbuf, auto& dld) { 411 const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment,
401 cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, 412 clear_value);
402 dld); 413 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
403 }); 414 });
404 } 415 }
405 if (use_depth || use_stencil) {
406 View zeta_surface;
407 {
408 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
409 zeta_surface = texture_cache.GetDepthBufferSurface(false);
410 }
411 416
412 zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, 417 if (!use_depth && !use_stencil) {
413 vk::PipelineStageFlagBits::eTransfer, 418 return;
414 vk::AccessFlagBits::eTransferWrite); 419 }
415 420 vk::ImageAspectFlags aspect_flags;
416 const vk::ClearDepthStencilValue clear(regs.clear_depth, 421 if (use_depth) {
417 static_cast<u32>(regs.clear_stencil)); 422 aspect_flags |= vk::ImageAspectFlagBits::eDepth;
418 scheduler.Record([image = zeta_surface->GetImage(),
419 subresource = zeta_surface->GetImageSubresourceRange(),
420 clear](auto cmdbuf, auto& dld) {
421 cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
422 subresource, dld);
423 });
424 } 423 }
424 if (use_stencil) {
425 aspect_flags |= vk::ImageAspectFlagBits::eStencil;
426 }
427
428 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
429 clear_rect, aspect_flags](auto cmdbuf, auto& dld) {
430 const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil);
431 const vk::ClearValue clear_value{clear_zeta};
432 const vk::ClearAttachment attachment(aspect_flags, 0, clear_value);
433 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld);
434 });
425} 435}
426 436
427void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { 437void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
@@ -738,6 +748,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
738 UpdateStencilFaces(regs); 748 UpdateStencilFaces(regs);
739} 749}
740 750
751void RasterizerVulkan::BeginTransformFeedback() {
752 const auto& regs = system.GPU().Maxwell3D().regs;
753 if (regs.tfb_enabled == 0) {
754 return;
755 }
756
757 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
758 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
759 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
760
761 UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
762 UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
763 UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
764
765 const auto& binding = regs.tfb_bindings[0];
766 UNIMPLEMENTED_IF(binding.buffer_enable == 0);
767 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
768
769 const GPUVAddr gpu_addr = binding.Address();
770 const std::size_t size = binding.buffer_size;
771 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
772
773 scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
774 cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
775 cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
776 });
777}
778
779void RasterizerVulkan::EndTransformFeedback() {
780 const auto& regs = system.GPU().Maxwell3D().regs;
781 if (regs.tfb_enabled == 0) {
782 return;
783 }
784
785 scheduler.Record(
786 [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
787}
788
741void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 789void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
742 BufferBindings& buffer_bindings) { 790 BufferBindings& buffer_bindings) {
743 const auto& regs = system.GPU().Maxwell3D().regs; 791 const auto& regs = system.GPU().Maxwell3D().regs;
@@ -1109,7 +1157,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
1109 // This implementation assumes that all attributes are used in the shader. 1157 // This implementation assumes that all attributes are used in the shader.
1110 const GPUVAddr start{regs.vertex_array[index].StartAddress()}; 1158 const GPUVAddr start{regs.vertex_array[index].StartAddress()};
1111 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; 1159 const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
1112 DEBUG_ASSERT(end > start); 1160 DEBUG_ASSERT(end >= start);
1113 1161
1114 size += (end - start + 1) * regs.vertex_array[index].enable; 1162 size += (end - start + 1) * regs.vertex_array[index].enable;
1115 } 1163 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 96ea05f0a..3185868e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -169,6 +169,10 @@ private:
169 169
170 void UpdateDynamicStates(); 170 void UpdateDynamicStates();
171 171
172 void BeginTransformFeedback();
173
174 void EndTransformFeedback();
175
172 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); 176 bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
173 177
174 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 178 void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -249,6 +253,7 @@ private:
249 VKStagingBufferPool staging_pool; 253 VKStagingBufferPool staging_pool;
250 VKDescriptorPool descriptor_pool; 254 VKDescriptorPool descriptor_pool;
251 VKUpdateDescriptorQueue update_descriptor_queue; 255 VKUpdateDescriptorQueue update_descriptor_queue;
256 VKRenderPassCache renderpass_cache;
252 QuadArrayPass quad_array_pass; 257 QuadArrayPass quad_array_pass;
253 Uint8Pass uint8_pass; 258 Uint8Pass uint8_pass;
254 259
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index cfcca5af0..51ecb5567 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -5,7 +5,9 @@
5#include <functional> 5#include <functional>
6#include <limits> 6#include <limits>
7#include <map> 7#include <map>
8#include <optional>
8#include <type_traits> 9#include <type_traits>
10#include <unordered_map>
9#include <utility> 11#include <utility>
10 12
11#include <fmt/format.h> 13#include <fmt/format.h>
@@ -24,6 +26,7 @@
24#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 26#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
25#include "video_core/shader/node.h" 27#include "video_core/shader/node.h"
26#include "video_core/shader/shader_ir.h" 28#include "video_core/shader/shader_ir.h"
29#include "video_core/shader/transform_feedback.h"
27 30
28namespace Vulkan { 31namespace Vulkan {
29 32
@@ -93,6 +96,12 @@ struct VertexIndices {
93 std::optional<u32> clip_distances; 96 std::optional<u32> clip_distances;
94}; 97};
95 98
99struct GenericVaryingDescription {
100 Id id = nullptr;
101 u32 first_element = 0;
102 bool is_scalar = false;
103};
104
96spv::Dim GetSamplerDim(const Sampler& sampler) { 105spv::Dim GetSamplerDim(const Sampler& sampler) {
97 ASSERT(!sampler.IsBuffer()); 106 ASSERT(!sampler.IsBuffer());
98 switch (sampler.GetType()) { 107 switch (sampler.GetType()) {
@@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
266class SPIRVDecompiler final : public Sirit::Module { 275class SPIRVDecompiler final : public Sirit::Module {
267public: 276public:
268 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, 277 explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
269 const Specialization& specialization) 278 const Registry& registry, const Specialization& specialization)
270 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, 279 : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
271 specialization{specialization} { 280 registry{registry}, specialization{specialization} {
281 if (stage != ShaderType::Compute) {
282 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
283 }
284
272 AddCapability(spv::Capability::Shader); 285 AddCapability(spv::Capability::Shader);
273 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); 286 AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
274 AddCapability(spv::Capability::ImageQuery); 287 AddCapability(spv::Capability::ImageQuery);
@@ -286,6 +299,15 @@ public:
286 AddExtension("SPV_KHR_variable_pointers"); 299 AddExtension("SPV_KHR_variable_pointers");
287 AddExtension("SPV_KHR_shader_draw_parameters"); 300 AddExtension("SPV_KHR_shader_draw_parameters");
288 301
302 if (!transform_feedback.empty()) {
303 if (device.IsExtTransformFeedbackSupported()) {
304 AddCapability(spv::Capability::TransformFeedback);
305 } else {
306 LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
307 "supported on this device");
308 }
309 }
310
289 if (ir.UsesLayer() || ir.UsesViewportIndex()) { 311 if (ir.UsesLayer() || ir.UsesViewportIndex()) {
290 if (ir.UsesViewportIndex()) { 312 if (ir.UsesViewportIndex()) {
291 AddCapability(spv::Capability::MultiViewport); 313 AddCapability(spv::Capability::MultiViewport);
@@ -296,7 +318,7 @@ public:
296 } 318 }
297 } 319 }
298 320
299 if (device.IsShaderStorageImageReadWithoutFormatSupported()) { 321 if (device.IsFormatlessImageLoadSupported()) {
300 AddCapability(spv::Capability::StorageImageReadWithoutFormat); 322 AddCapability(spv::Capability::StorageImageReadWithoutFormat);
301 } 323 }
302 324
@@ -318,25 +340,29 @@ public:
318 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 340 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
319 header.common2.threads_per_input_primitive); 341 header.common2.threads_per_input_primitive);
320 break; 342 break;
321 case ShaderType::TesselationEval: 343 case ShaderType::TesselationEval: {
344 const auto& info = registry.GetGraphicsInfo();
322 AddCapability(spv::Capability::Tessellation); 345 AddCapability(spv::Capability::Tessellation);
323 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); 346 AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
324 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); 347 AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
325 AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); 348 AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
326 AddExecutionMode(main, specialization.tessellation.clockwise 349 AddExecutionMode(main, info.tessellation_clockwise
327 ? spv::ExecutionMode::VertexOrderCw 350 ? spv::ExecutionMode::VertexOrderCw
328 : spv::ExecutionMode::VertexOrderCcw); 351 : spv::ExecutionMode::VertexOrderCcw);
329 break; 352 break;
330 case ShaderType::Geometry: 353 }
354 case ShaderType::Geometry: {
355 const auto& info = registry.GetGraphicsInfo();
331 AddCapability(spv::Capability::Geometry); 356 AddCapability(spv::Capability::Geometry);
332 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); 357 AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
333 AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); 358 AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
334 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); 359 AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
335 AddExecutionMode(main, spv::ExecutionMode::OutputVertices, 360 AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
336 header.common4.max_output_vertices); 361 header.common4.max_output_vertices);
337 // TODO(Rodrigo): Where can we get this info from? 362 // TODO(Rodrigo): Where can we get this info from?
338 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); 363 AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
339 break; 364 break;
365 }
340 case ShaderType::Fragment: 366 case ShaderType::Fragment:
341 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); 367 AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
342 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); 368 AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
@@ -545,7 +571,8 @@ private:
545 if (stage != ShaderType::Geometry) { 571 if (stage != ShaderType::Geometry) {
546 return; 572 return;
547 } 573 }
548 const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 574 const auto& info = registry.GetGraphicsInfo();
575 const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
549 DeclareInputVertexArray(num_input); 576 DeclareInputVertexArray(num_input);
550 DeclareOutputVertex(); 577 DeclareOutputVertex();
551 } 578 }
@@ -742,12 +769,34 @@ private:
742 } 769 }
743 770
744 void DeclareOutputAttributes() { 771 void DeclareOutputAttributes() {
772 if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
773 return;
774 }
775
776 UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
745 for (const auto index : ir.GetOutputAttributes()) { 777 for (const auto index : ir.GetOutputAttributes()) {
746 if (!IsGenericAttribute(index)) { 778 if (!IsGenericAttribute(index)) {
747 continue; 779 continue;
748 } 780 }
749 const u32 location = GetGenericAttributeLocation(index); 781 DeclareOutputAttribute(index);
750 Id type = t_float4; 782 }
783 }
784
785 void DeclareOutputAttribute(Attribute::Index index) {
786 static constexpr std::string_view swizzle = "xyzw";
787
788 const u32 location = GetGenericAttributeLocation(index);
789 u8 element = 0;
790 while (element < 4) {
791 const std::size_t remainder = 4 - element;
792
793 std::size_t num_components = remainder;
794 const std::optional tfb = GetTransformFeedbackInfo(index, element);
795 if (tfb) {
796 num_components = tfb->components;
797 }
798
799 Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
751 Id varying_default = v_varying_default; 800 Id varying_default = v_varying_default;
752 if (IsOutputAttributeArray()) { 801 if (IsOutputAttributeArray()) {
753 const u32 num = GetNumOutputVertices(); 802 const u32 num = GetNumOutputVertices();
@@ -760,13 +809,45 @@ private:
760 } 809 }
761 type = TypePointer(spv::StorageClass::Output, type); 810 type = TypePointer(spv::StorageClass::Output, type);
762 811
812 std::string name = fmt::format("out_attr{}", location);
813 if (num_components < 4 || element > 0) {
814 name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
815 }
816
763 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); 817 const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
764 Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); 818 Name(AddGlobalVariable(id), name);
765 output_attributes.emplace(index, id); 819
820 GenericVaryingDescription description;
821 description.id = id;
822 description.first_element = element;
823 description.is_scalar = num_components == 1;
824 for (u32 i = 0; i < num_components; ++i) {
825 const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
826 output_attributes.emplace(offset, description);
827 }
766 interfaces.push_back(id); 828 interfaces.push_back(id);
767 829
768 Decorate(id, spv::Decoration::Location, location); 830 Decorate(id, spv::Decoration::Location, location);
831 if (element > 0) {
832 Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
833 }
834 if (tfb && device.IsExtTransformFeedbackSupported()) {
835 Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
836 Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
837 Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
838 }
839
840 element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
841 }
842 }
843
844 std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
845 const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
846 const auto it = transform_feedback.find(location);
847 if (it == transform_feedback.end()) {
848 return {};
769 } 849 }
850 return it->second;
770 } 851 }
771 852
772 u32 DeclareConstantBuffers(u32 binding) { 853 u32 DeclareConstantBuffers(u32 binding) {
@@ -898,7 +979,7 @@ private:
898 u32 GetNumInputVertices() const { 979 u32 GetNumInputVertices() const {
899 switch (stage) { 980 switch (stage) {
900 case ShaderType::Geometry: 981 case ShaderType::Geometry:
901 return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); 982 return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
902 case ShaderType::TesselationControl: 983 case ShaderType::TesselationControl:
903 case ShaderType::TesselationEval: 984 case ShaderType::TesselationEval:
904 return NumInputPatches; 985 return NumInputPatches;
@@ -1346,8 +1427,14 @@ private:
1346 } 1427 }
1347 default: 1428 default:
1348 if (IsGenericAttribute(attribute)) { 1429 if (IsGenericAttribute(attribute)) {
1349 const Id composite = output_attributes.at(attribute); 1430 const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
1350 return {ArrayPass(t_out_float, composite, {element}), Type::Float}; 1431 const GenericVaryingDescription description = output_attributes.at(offset);
1432 const Id composite = description.id;
1433 std::vector<u32> indices;
1434 if (!description.is_scalar) {
1435 indices.push_back(element - description.first_element);
1436 }
1437 return {ArrayPass(t_out_float, composite, indices), Type::Float};
1351 } 1438 }
1352 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", 1439 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1353 static_cast<u32>(attribute)); 1440 static_cast<u32>(attribute));
@@ -1793,7 +1880,7 @@ private:
1793 } 1880 }
1794 1881
1795 Expression ImageLoad(Operation operation) { 1882 Expression ImageLoad(Operation operation) {
1796 if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { 1883 if (!device.IsFormatlessImageLoadSupported()) {
1797 return {v_float_zero, Type::Float}; 1884 return {v_float_zero, Type::Float};
1798 } 1885 }
1799 1886
@@ -2258,11 +2345,11 @@ private:
2258 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { 2345 std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
2259 switch (type) { 2346 switch (type) {
2260 case Type::Float: 2347 case Type::Float:
2261 return {nullptr, t_float2, t_float3, t_float4}; 2348 return {t_float, t_float2, t_float3, t_float4};
2262 case Type::Int: 2349 case Type::Int:
2263 return {nullptr, t_int2, t_int3, t_int4}; 2350 return {t_int, t_int2, t_int3, t_int4};
2264 case Type::Uint: 2351 case Type::Uint:
2265 return {nullptr, t_uint2, t_uint3, t_uint4}; 2352 return {t_uint, t_uint2, t_uint3, t_uint4};
2266 default: 2353 default:
2267 UNIMPLEMENTED(); 2354 UNIMPLEMENTED();
2268 return {}; 2355 return {};
@@ -2495,7 +2582,9 @@ private:
2495 const ShaderIR& ir; 2582 const ShaderIR& ir;
2496 const ShaderType stage; 2583 const ShaderType stage;
2497 const Tegra::Shader::Header header; 2584 const Tegra::Shader::Header header;
2585 const Registry& registry;
2498 const Specialization& specialization; 2586 const Specialization& specialization;
2587 std::unordered_map<u8, VaryingTFB> transform_feedback;
2499 2588
2500 const Id t_void = Name(TypeVoid(), "void"); 2589 const Id t_void = Name(TypeVoid(), "void");
2501 2590
@@ -2584,7 +2673,7 @@ private:
2584 Id shared_memory{}; 2673 Id shared_memory{};
2585 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; 2674 std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
2586 std::map<Attribute::Index, Id> input_attributes; 2675 std::map<Attribute::Index, Id> input_attributes;
2587 std::map<Attribute::Index, Id> output_attributes; 2676 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2588 std::map<u32, Id> constant_buffers; 2677 std::map<u32, Id> constant_buffers;
2589 std::map<GlobalMemoryBase, Id> global_buffers; 2678 std::map<GlobalMemoryBase, Id> global_buffers;
2590 std::map<u32, TexelBuffer> texel_buffers; 2679 std::map<u32, TexelBuffer> texel_buffers;
@@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
2870} 2959}
2871 2960
2872std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 2961std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
2873 ShaderType stage, const Specialization& specialization) { 2962 ShaderType stage, const VideoCommon::Shader::Registry& registry,
2874 return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); 2963 const Specialization& specialization) {
2964 return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
2875} 2965}
2876 2966
2877} // namespace Vulkan 2967} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f5dc14d9e..ffea4709e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -15,6 +15,7 @@
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
17#include "video_core/engines/shader_type.h" 17#include "video_core/engines/shader_type.h"
18#include "video_core/shader/registry.h"
18#include "video_core/shader/shader_ir.h" 19#include "video_core/shader/shader_ir.h"
19 20
20namespace Vulkan { 21namespace Vulkan {
@@ -91,17 +92,9 @@ struct Specialization final {
91 u32 shared_memory_size{}; 92 u32 shared_memory_size{};
92 93
93 // Graphics specific 94 // Graphics specific
94 Maxwell::PrimitiveTopology primitive_topology{};
95 std::optional<float> point_size{}; 95 std::optional<float> point_size{};
96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
97 bool ndc_minus_one_to_one{}; 97 bool ndc_minus_one_to_one{};
98
99 // Tessellation specific
100 struct {
101 Maxwell::TessellationPrimitive primitive{};
102 Maxwell::TessellationSpacing spacing{};
103 bool clockwise{};
104 } tessellation;
105}; 98};
106// Old gcc versions don't consider this trivially copyable. 99// Old gcc versions don't consider this trivially copyable.
107// static_assert(std::is_trivially_copyable_v<Specialization>); 100// static_assert(std::is_trivially_copyable_v<Specialization>);
@@ -114,6 +107,8 @@ struct SPIRVShader {
114ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); 107ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
115 108
116std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, 109std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
117 Tegra::Engines::ShaderType stage, const Specialization& specialization); 110 Tegra::Engines::ShaderType stage,
111 const VideoCommon::Shader::Registry& registry,
112 const Specialization& specialization);
118 113
119} // namespace Vulkan 114} // namespace Vulkan
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index e02bcd097..8e3b46e8e 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF(instr.bfe.negate_b);
21
22 Node op_a = GetRegister(instr.gpr8); 20 Node op_a = GetRegister(instr.gpr8);
23 op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); 21 Node op_b = [&] {
24 22 switch (opcode->get().GetId()) {
25 switch (opcode->get().GetId()) { 23 case OpCode::Id::BFE_R:
26 case OpCode::Id::BFE_IMM: { 24 return GetRegister(instr.gpr20);
27 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 25 case OpCode::Id::BFE_C:
28 "Condition codes generation in BFE is not implemented"); 26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
29 34
30 const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); 35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
31 const Node outer_shift_imm =
32 Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position));
33 36
34 const Node inner_shift = 37 const bool is_signed = instr.bfe.is_signed;
35 Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm);
36 const Node outer_shift =
37 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
38 38
39 SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); 39 // using reverse parallel method in
40 SetRegister(bb, instr.gpr0, outer_shift); 40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 break; 41 // note for later if possible to implement faster method.
42 } 42 if (instr.bfe.brev) {
43 default: 43 const auto swap = [&](u32 s, u32 mask) {
44 UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); 44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
45 } 65 }
46 66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
47 return pc; 74 return pc;
48} 75}
49 76
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index b3dcd291c..76c56abb5 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed)
68 return OperationCode::UBitwiseXor; 68 return OperationCode::UBitwiseXor;
69 case OperationCode::IBitwiseNot: 69 case OperationCode::IBitwiseNot:
70 return OperationCode::UBitwiseNot; 70 return OperationCode::UBitwiseNot;
71 case OperationCode::IBitfieldExtract:
72 return OperationCode::UBitfieldExtract;
71 case OperationCode::IBitfieldInsert: 73 case OperationCode::IBitfieldInsert:
72 return OperationCode::UBitfieldInsert; 74 return OperationCode::UBitfieldInsert;
73 case OperationCode::IBitCount: 75 case OperationCode::IBitCount:
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp
new file mode 100644
index 000000000..22a933761
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.cpp
@@ -0,0 +1,115 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <unordered_map>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/registry.h"
13#include "video_core/shader/transform_feedback.h"
14
15namespace VideoCommon::Shader {
16
17namespace {
18
19using Maxwell = Tegra::Engines::Maxwell3D::Regs;
20
21// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
22
23/// Attribute offsets that describe a vector
24constexpr std::array VECTORS = {
25 28, // gl_Position
26 32, // Generic 0
27 36, // Generic 1
28 40, // Generic 2
29 44, // Generic 3
30 48, // Generic 4
31 52, // Generic 5
32 56, // Generic 6
33 60, // Generic 7
34 64, // Generic 8
35 68, // Generic 9
36 72, // Generic 10
37 76, // Generic 11
38 80, // Generic 12
39 84, // Generic 13
40 88, // Generic 14
41 92, // Generic 15
42 96, // Generic 16
43 100, // Generic 17
44 104, // Generic 18
45 108, // Generic 19
46 112, // Generic 20
47 116, // Generic 21
48 120, // Generic 22
49 124, // Generic 23
50 128, // Generic 24
51 132, // Generic 25
52 136, // Generic 26
53 140, // Generic 27
54 144, // Generic 28
55 148, // Generic 29
56 152, // Generic 30
57 156, // Generic 31
58 160, // gl_FrontColor
59 164, // gl_FrontSecondaryColor
60 160, // gl_BackColor
61 164, // gl_BackSecondaryColor
62 192, // gl_TexCoord[0]
63 196, // gl_TexCoord[1]
64 200, // gl_TexCoord[2]
65 204, // gl_TexCoord[3]
66 208, // gl_TexCoord[4]
67 212, // gl_TexCoord[5]
68 216, // gl_TexCoord[6]
69 220, // gl_TexCoord[7]
70};
71} // namespace
72
73std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
74
75 std::unordered_map<u8, VaryingTFB> tfb;
76
77 for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
78 const auto& locations = info.tfb_varying_locs[buffer];
79 const auto& layout = info.tfb_layouts[buffer];
80 const std::size_t varying_count = layout.varying_count;
81
82 std::size_t highest = 0;
83
84 for (std::size_t offset = 0; offset < varying_count; ++offset) {
85 const std::size_t base_offset = offset;
86 const u8 location = locations[offset];
87
88 VaryingTFB varying;
89 varying.buffer = layout.stream;
90 varying.stride = layout.stride;
91 varying.offset = offset * sizeof(u32);
92 varying.components = 1;
93
94 if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
95 UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
96
97 const u8 base_index = location / 4;
98 while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
99 ++offset;
100 ++varying.components;
101 }
102 }
103
104 [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
105 UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
106
107 highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
108 }
109
110 UNIMPLEMENTED_IF(highest != layout.stride);
111 }
112 return tfb;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h
new file mode 100644
index 000000000..77d05f64c
--- /dev/null
+++ b/src/video_core/shader/transform_feedback.h
@@ -0,0 +1,23 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/shader/registry.h"
11
12namespace VideoCommon::Shader {
13
14struct VaryingTFB {
15 std::size_t buffer;
16 std::size_t stride;
17 std::size_t offset;
18 std::size_t components;
19};
20
21std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
22
23} // namespace VideoCommon::Shader
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 9707c353d..cc7181229 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
111 return PixelFormat::RGBA16F; 111 return PixelFormat::RGBA16F;
112 case Tegra::RenderTargetFormat::RGBA16_UNORM: 112 case Tegra::RenderTargetFormat::RGBA16_UNORM:
113 return PixelFormat::RGBA16U; 113 return PixelFormat::RGBA16U;
114 case Tegra::RenderTargetFormat::RGBA16_SNORM:
115 return PixelFormat::RGBA16S;
114 case Tegra::RenderTargetFormat::RGBA16_UINT: 116 case Tegra::RenderTargetFormat::RGBA16_UINT:
115 return PixelFormat::RGBA16UI; 117 return PixelFormat::RGBA16UI;
116 case Tegra::RenderTargetFormat::RGBA32_FLOAT: 118 case Tegra::RenderTargetFormat::RGBA32_FLOAT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index d88109e5a..ae8817465 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -25,82 +25,83 @@ enum class PixelFormat {
25 R8UI = 7, 25 R8UI = 7,
26 RGBA16F = 8, 26 RGBA16F = 8,
27 RGBA16U = 9, 27 RGBA16U = 9,
28 RGBA16UI = 10, 28 RGBA16S = 10,
29 R11FG11FB10F = 11, 29 RGBA16UI = 11,
30 RGBA32UI = 12, 30 R11FG11FB10F = 12,
31 DXT1 = 13, 31 RGBA32UI = 13,
32 DXT23 = 14, 32 DXT1 = 14,
33 DXT45 = 15, 33 DXT23 = 15,
34 DXN1 = 16, // This is also known as BC4 34 DXT45 = 16,
35 DXN2UNORM = 17, 35 DXN1 = 17, // This is also known as BC4
36 DXN2SNORM = 18, 36 DXN2UNORM = 18,
37 BC7U = 19, 37 DXN2SNORM = 19,
38 BC6H_UF16 = 20, 38 BC7U = 20,
39 BC6H_SF16 = 21, 39 BC6H_UF16 = 21,
40 ASTC_2D_4X4 = 22, 40 BC6H_SF16 = 22,
41 BGRA8 = 23, 41 ASTC_2D_4X4 = 23,
42 RGBA32F = 24, 42 BGRA8 = 24,
43 RG32F = 25, 43 RGBA32F = 25,
44 R32F = 26, 44 RG32F = 26,
45 R16F = 27, 45 R32F = 27,
46 R16U = 28, 46 R16F = 28,
47 R16S = 29, 47 R16U = 29,
48 R16UI = 30, 48 R16S = 30,
49 R16I = 31, 49 R16UI = 31,
50 RG16 = 32, 50 R16I = 32,
51 RG16F = 33, 51 RG16 = 33,
52 RG16UI = 34, 52 RG16F = 34,
53 RG16I = 35, 53 RG16UI = 35,
54 RG16S = 36, 54 RG16I = 36,
55 RGB32F = 37, 55 RG16S = 37,
56 RGBA8_SRGB = 38, 56 RGB32F = 38,
57 RG8U = 39, 57 RGBA8_SRGB = 39,
58 RG8S = 40, 58 RG8U = 40,
59 RG32UI = 41, 59 RG8S = 41,
60 RGBX16F = 42, 60 RG32UI = 42,
61 R32UI = 43, 61 RGBX16F = 43,
62 R32I = 44, 62 R32UI = 44,
63 ASTC_2D_8X8 = 45, 63 R32I = 45,
64 ASTC_2D_8X5 = 46, 64 ASTC_2D_8X8 = 46,
65 ASTC_2D_5X4 = 47, 65 ASTC_2D_8X5 = 47,
66 BGRA8_SRGB = 48, 66 ASTC_2D_5X4 = 48,
67 DXT1_SRGB = 49, 67 BGRA8_SRGB = 49,
68 DXT23_SRGB = 50, 68 DXT1_SRGB = 50,
69 DXT45_SRGB = 51, 69 DXT23_SRGB = 51,
70 BC7U_SRGB = 52, 70 DXT45_SRGB = 52,
71 R4G4B4A4U = 53, 71 BC7U_SRGB = 53,
72 ASTC_2D_4X4_SRGB = 54, 72 R4G4B4A4U = 54,
73 ASTC_2D_8X8_SRGB = 55, 73 ASTC_2D_4X4_SRGB = 55,
74 ASTC_2D_8X5_SRGB = 56, 74 ASTC_2D_8X8_SRGB = 56,
75 ASTC_2D_5X4_SRGB = 57, 75 ASTC_2D_8X5_SRGB = 57,
76 ASTC_2D_5X5 = 58, 76 ASTC_2D_5X4_SRGB = 58,
77 ASTC_2D_5X5_SRGB = 59, 77 ASTC_2D_5X5 = 59,
78 ASTC_2D_10X8 = 60, 78 ASTC_2D_5X5_SRGB = 60,
79 ASTC_2D_10X8_SRGB = 61, 79 ASTC_2D_10X8 = 61,
80 ASTC_2D_6X6 = 62, 80 ASTC_2D_10X8_SRGB = 62,
81 ASTC_2D_6X6_SRGB = 63, 81 ASTC_2D_6X6 = 63,
82 ASTC_2D_10X10 = 64, 82 ASTC_2D_6X6_SRGB = 64,
83 ASTC_2D_10X10_SRGB = 65, 83 ASTC_2D_10X10 = 65,
84 ASTC_2D_12X12 = 66, 84 ASTC_2D_10X10_SRGB = 66,
85 ASTC_2D_12X12_SRGB = 67, 85 ASTC_2D_12X12 = 67,
86 ASTC_2D_8X6 = 68, 86 ASTC_2D_12X12_SRGB = 68,
87 ASTC_2D_8X6_SRGB = 69, 87 ASTC_2D_8X6 = 69,
88 ASTC_2D_6X5 = 70, 88 ASTC_2D_8X6_SRGB = 70,
89 ASTC_2D_6X5_SRGB = 71, 89 ASTC_2D_6X5 = 71,
90 E5B9G9R9F = 72, 90 ASTC_2D_6X5_SRGB = 72,
91 E5B9G9R9F = 73,
91 92
92 MaxColorFormat, 93 MaxColorFormat,
93 94
94 // Depth formats 95 // Depth formats
95 Z32F = 73, 96 Z32F = 74,
96 Z16 = 74, 97 Z16 = 75,
97 98
98 MaxDepthFormat, 99 MaxDepthFormat,
99 100
100 // DepthStencil formats 101 // DepthStencil formats
101 Z24S8 = 75, 102 Z24S8 = 76,
102 S8Z24 = 76, 103 S8Z24 = 77,
103 Z32FS8 = 77, 104 Z32FS8 = 78,
104 105
105 MaxDepthStencilFormat, 106 MaxDepthStencilFormat,
106 107
@@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
138 0, // R8UI 139 0, // R8UI
139 0, // RGBA16F 140 0, // RGBA16F
140 0, // RGBA16U 141 0, // RGBA16U
142 0, // RGBA16S
141 0, // RGBA16UI 143 0, // RGBA16UI
142 0, // R11FG11FB10F 144 0, // R11FG11FB10F
143 0, // RGBA32UI 145 0, // RGBA32UI
@@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
235 1, // R8UI 237 1, // R8UI
236 1, // RGBA16F 238 1, // RGBA16F
237 1, // RGBA16U 239 1, // RGBA16U
240 1, // RGBA16S
238 1, // RGBA16UI 241 1, // RGBA16UI
239 1, // R11FG11FB10F 242 1, // R11FG11FB10F
240 1, // RGBA32UI 243 1, // RGBA32UI
@@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
324 1, // R8UI 327 1, // R8UI
325 1, // RGBA16F 328 1, // RGBA16F
326 1, // RGBA16U 329 1, // RGBA16U
330 1, // RGBA16S
327 1, // RGBA16UI 331 1, // RGBA16UI
328 1, // R11FG11FB10F 332 1, // R11FG11FB10F
329 1, // RGBA32UI 333 1, // RGBA32UI
@@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
413 8, // R8UI 417 8, // R8UI
414 64, // RGBA16F 418 64, // RGBA16F
415 64, // RGBA16U 419 64, // RGBA16U
420 64, // RGBA16S
416 64, // RGBA16UI 421 64, // RGBA16UI
417 32, // R11FG11FB10F 422 32, // R11FG11FB10F
418 128, // RGBA32UI 423 128, // RGBA32UI
@@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
517 SurfaceCompression::None, // R8UI 522 SurfaceCompression::None, // R8UI
518 SurfaceCompression::None, // RGBA16F 523 SurfaceCompression::None, // RGBA16F
519 SurfaceCompression::None, // RGBA16U 524 SurfaceCompression::None, // RGBA16U
525 SurfaceCompression::None, // RGBA16S
520 SurfaceCompression::None, // RGBA16UI 526 SurfaceCompression::None, // RGBA16UI
521 SurfaceCompression::None, // R11FG11FB10F 527 SurfaceCompression::None, // R11FG11FB10F
522 SurfaceCompression::None, // RGBA32UI 528 SurfaceCompression::None, // RGBA32UI
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index cc3ad8417..e151c26c4 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 75> DefinitionTable = {{ 44constexpr std::array<Table, 76> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{
61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, 61 {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, 62 {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
63 63
64 {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S},
64 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, 65 {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
65 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, 66 {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
66 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, 67 {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index f00839313..9931c5ef7 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
113 params.height = tic.Height(); 113 params.height = tic.Height();
114 params.depth = tic.Depth(); 114 params.depth = tic.Depth();
115 params.pitch = params.is_tiled ? 0 : tic.Pitch(); 115 params.pitch = params.is_tiled ? 0 : tic.Pitch();
116 if (params.target == SurfaceTarget::TextureCubemap || 116 if (params.target == SurfaceTarget::Texture2D && params.depth > 1) {
117 params.target == SurfaceTarget::TextureCubeArray) { 117 params.depth = 1;
118 } else if (params.target == SurfaceTarget::TextureCubemap ||
119 params.target == SurfaceTarget::TextureCubeArray) {
118 params.depth *= 6; 120 params.depth *= 6;
119 } 121 }
120 params.num_levels = tic.max_mip_level + 1; 122 params.num_levels = tic.max_mip_level + 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 51373b687..6cdbe63d0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -104,6 +104,11 @@ public:
104 if (!cache_addr) { 104 if (!cache_addr) {
105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
106 } 106 }
107
108 if (!IsTypeCompatible(tic.texture_type, entry)) {
109 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
110 }
111
107 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
108 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false);
109 if (guard_samplers) { 114 if (guard_samplers) {
@@ -914,13 +919,15 @@ private:
914 params.width = 1; 919 params.width = 1;
915 params.height = 1; 920 params.height = 1;
916 params.depth = 1; 921 params.depth = 1;
922 if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) {
923 params.depth = 6;
924 }
917 params.pitch = 4; 925 params.pitch = 4;
918 params.num_levels = 1; 926 params.num_levels = 1;
919 params.emulated_levels = 1; 927 params.emulated_levels = 1;
920 params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; 928 params.pixel_format = VideoCore::Surface::PixelFormat::R8U;
921 params.type = VideoCore::Surface::SurfaceType::ColorTexture; 929 params.type = VideoCore::Surface::SurfaceType::ColorTexture;
922 auto surface = CreateSurface(0ULL, params); 930 auto surface = CreateSurface(0ULL, params);
923 invalid_memory.clear();
924 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); 931 invalid_memory.resize(surface->GetHostSizeInBytes(), 0U);
925 surface->UploadTexture(invalid_memory); 932 surface->UploadTexture(invalid_memory);
926 surface->MarkAsModified(false, Tick()); 933 surface->MarkAsModified(false, Tick());
@@ -1082,6 +1089,36 @@ private:
1082 return siblings_table[static_cast<std::size_t>(format)]; 1089 return siblings_table[static_cast<std::size_t>(format)];
1083 } 1090 }
1084 1091
1092 /// Returns true the shader sampler entry is compatible with the TIC texture type.
1093 static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type,
1094 const VideoCommon::Shader::Sampler& entry) {
1095 const auto shader_type = entry.GetType();
1096 switch (tic_type) {
1097 case Tegra::Texture::TextureType::Texture1D:
1098 case Tegra::Texture::TextureType::Texture1DArray:
1099 return shader_type == Tegra::Shader::TextureType::Texture1D;
1100 case Tegra::Texture::TextureType::Texture1DBuffer:
1101 // TODO(Rodrigo): Assume as valid for now
1102 return true;
1103 case Tegra::Texture::TextureType::Texture2D:
1104 case Tegra::Texture::TextureType::Texture2DNoMipmap:
1105 return shader_type == Tegra::Shader::TextureType::Texture2D;
1106 case Tegra::Texture::TextureType::Texture2DArray:
1107 return shader_type == Tegra::Shader::TextureType::Texture2D ||
1108 shader_type == Tegra::Shader::TextureType::TextureCube;
1109 case Tegra::Texture::TextureType::Texture3D:
1110 return shader_type == Tegra::Shader::TextureType::Texture3D;
1111 case Tegra::Texture::TextureType::TextureCubeArray:
1112 case Tegra::Texture::TextureType::TextureCubemap:
1113 if (shader_type == Tegra::Shader::TextureType::TextureCube) {
1114 return true;
1115 }
1116 return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray();
1117 }
1118 UNREACHABLE();
1119 return true;
1120 }
1121
1085 struct FramebufferTargetInfo { 1122 struct FramebufferTargetInfo {
1086 TSurface target; 1123 TSurface target;
1087 TView view; 1124 TView view;