diff options
Diffstat (limited to 'src')
32 files changed, 1018 insertions, 391 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 1f621fb1f..fbebed715 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -83,6 +83,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 83 | "${VIDEO_CORE}/shader/shader_ir.cpp" | 83 | "${VIDEO_CORE}/shader/shader_ir.cpp" |
| 84 | "${VIDEO_CORE}/shader/shader_ir.h" | 84 | "${VIDEO_CORE}/shader/shader_ir.h" |
| 85 | "${VIDEO_CORE}/shader/track.cpp" | 85 | "${VIDEO_CORE}/shader/track.cpp" |
| 86 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 86 | # and also check that the scm_rev files haven't changed | 88 | # and also check that the scm_rev files haven't changed |
| 87 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |
| 88 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index 69b7abc54..566b57b62 100644 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp | |||
| @@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 16 | 16 | ||
| 17 | pointers.resize(num_page_table_entries); | 17 | pointers.resize(num_page_table_entries); |
| 18 | attributes.resize(num_page_table_entries); | 18 | attributes.resize(num_page_table_entries); |
| 19 | backing_addr.resize(num_page_table_entries); | ||
| 20 | 19 | ||
| 21 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the | 20 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the |
| 22 | // vector size is subsequently decreased (via resize), the vector might not automatically | 21 | // vector size is subsequently decreased (via resize), the vector might not automatically |
| @@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 25 | 24 | ||
| 26 | pointers.shrink_to_fit(); | 25 | pointers.shrink_to_fit(); |
| 27 | attributes.shrink_to_fit(); | 26 | attributes.shrink_to_fit(); |
| 27 | } | ||
| 28 | |||
| 29 | BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {} | ||
| 30 | |||
| 31 | BackingPageTable::~BackingPageTable() = default; | ||
| 32 | |||
| 33 | void BackingPageTable::Resize(std::size_t address_space_width_in_bits) { | ||
| 34 | PageTable::Resize(address_space_width_in_bits); | ||
| 35 | const std::size_t num_page_table_entries = 1ULL | ||
| 36 | << (address_space_width_in_bits - page_size_in_bits); | ||
| 37 | backing_addr.resize(num_page_table_entries); | ||
| 28 | backing_addr.shrink_to_fit(); | 38 | backing_addr.shrink_to_fit(); |
| 29 | } | 39 | } |
| 30 | 40 | ||
diff --git a/src/common/page_table.h b/src/common/page_table.h index 8b8ff0bb8..dbc272ab7 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h | |||
| @@ -76,9 +76,20 @@ struct PageTable { | |||
| 76 | */ | 76 | */ |
| 77 | std::vector<PageType> attributes; | 77 | std::vector<PageType> attributes; |
| 78 | 78 | ||
| 79 | std::vector<u64> backing_addr; | ||
| 80 | |||
| 81 | const std::size_t page_size_in_bits{}; | 79 | const std::size_t page_size_in_bits{}; |
| 82 | }; | 80 | }; |
| 83 | 81 | ||
| 82 | /** | ||
| 83 | * A more advanced Page Table with the ability to save a backing address when using it | ||
| 84 | * depends on another MMU. | ||
| 85 | */ | ||
| 86 | struct BackingPageTable : PageTable { | ||
| 87 | explicit BackingPageTable(std::size_t page_size_in_bits); | ||
| 88 | ~BackingPageTable(); | ||
| 89 | |||
| 90 | void Resize(std::size_t address_space_width_in_bits); | ||
| 91 | |||
| 92 | std::vector<u64> backing_addr; | ||
| 93 | }; | ||
| 94 | |||
| 84 | } // namespace Common | 95 | } // namespace Common |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0101e5f0e..91df062d7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -129,6 +129,8 @@ add_library(video_core STATIC | |||
| 129 | shader/shader_ir.cpp | 129 | shader/shader_ir.cpp |
| 130 | shader/shader_ir.h | 130 | shader/shader_ir.h |
| 131 | shader/track.cpp | 131 | shader/track.cpp |
| 132 | shader/transform_feedback.cpp | ||
| 133 | shader/transform_feedback.h | ||
| 132 | surface.cpp | 134 | surface.cpp |
| 133 | surface.h | 135 | surface.h |
| 134 | texture_cache/format_lookup_table.cpp | 136 | texture_cache/format_lookup_table.cpp |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8752a1cfb..8a9e9992e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -628,19 +628,26 @@ public: | |||
| 628 | float depth_range_far; | 628 | float depth_range_far; |
| 629 | }; | 629 | }; |
| 630 | 630 | ||
| 631 | struct alignas(32) TransformFeedbackBinding { | 631 | struct TransformFeedbackBinding { |
| 632 | u32 buffer_enable; | 632 | u32 buffer_enable; |
| 633 | u32 address_high; | 633 | u32 address_high; |
| 634 | u32 address_low; | 634 | u32 address_low; |
| 635 | s32 buffer_size; | 635 | s32 buffer_size; |
| 636 | s32 buffer_offset; | 636 | s32 buffer_offset; |
| 637 | INSERT_UNION_PADDING_WORDS(3); | ||
| 638 | |||
| 639 | GPUVAddr Address() const { | ||
| 640 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 641 | address_low); | ||
| 642 | } | ||
| 637 | }; | 643 | }; |
| 638 | static_assert(sizeof(TransformFeedbackBinding) == 32); | 644 | static_assert(sizeof(TransformFeedbackBinding) == 32); |
| 639 | 645 | ||
| 640 | struct alignas(16) TransformFeedbackLayout { | 646 | struct TransformFeedbackLayout { |
| 641 | u32 stream; | 647 | u32 stream; |
| 642 | u32 varying_count; | 648 | u32 varying_count; |
| 643 | u32 stride; | 649 | u32 stride; |
| 650 | INSERT_UNION_PADDING_WORDS(1); | ||
| 644 | }; | 651 | }; |
| 645 | static_assert(sizeof(TransformFeedbackLayout) == 16); | 652 | static_assert(sizeof(TransformFeedbackLayout) == 16); |
| 646 | 653 | ||
| @@ -652,6 +659,10 @@ public: | |||
| 652 | return shader_config[index].enable != 0; | 659 | return shader_config[index].enable != 0; |
| 653 | } | 660 | } |
| 654 | 661 | ||
| 662 | bool IsShaderConfigEnabled(Regs::ShaderProgram type) const { | ||
| 663 | return IsShaderConfigEnabled(static_cast<std::size_t>(type)); | ||
| 664 | } | ||
| 665 | |||
| 655 | union { | 666 | union { |
| 656 | struct { | 667 | struct { |
| 657 | INSERT_UNION_PADDING_WORDS(0x45); | 668 | INSERT_UNION_PADDING_WORDS(0x45); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c9bc83cd7..eba42deb4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -911,14 +911,9 @@ union Instruction { | |||
| 911 | } fadd32i; | 911 | } fadd32i; |
| 912 | 912 | ||
| 913 | union { | 913 | union { |
| 914 | BitField<20, 8, u64> shift_position; | 914 | BitField<40, 1, u64> brev; |
| 915 | BitField<28, 8, u64> shift_length; | 915 | BitField<47, 1, u64> rd_cc; |
| 916 | BitField<48, 1, u64> negate_b; | 916 | BitField<48, 1, u64> is_signed; |
| 917 | BitField<49, 1, u64> negate_a; | ||
| 918 | |||
| 919 | u64 GetLeftShiftValue() const { | ||
| 920 | return 32 - (shift_position + shift_length); | ||
| 921 | } | ||
| 922 | } bfe; | 917 | } bfe; |
| 923 | 918 | ||
| 924 | union { | 919 | union { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ba8c9d665..64acb17df 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 { | |||
| 39 | RGBA32_FLOAT = 0xC0, | 39 | RGBA32_FLOAT = 0xC0, |
| 40 | RGBA32_UINT = 0xC2, | 40 | RGBA32_UINT = 0xC2, |
| 41 | RGBA16_UNORM = 0xC6, | 41 | RGBA16_UNORM = 0xC6, |
| 42 | RGBA16_SNORM = 0xC7, | ||
| 42 | RGBA16_UINT = 0xC9, | 43 | RGBA16_UINT = 0xC9, |
| 43 | RGBA16_FLOAT = 0xCA, | 44 | RGBA16_FLOAT = 0xCA, |
| 44 | RG32_FLOAT = 0xCB, | 45 | RG32_FLOAT = 0xCB, |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index aea010087..073bdb491 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -174,7 +174,7 @@ private: | |||
| 174 | /// End of address space, based on address space in bits. | 174 | /// End of address space, based on address space in bits. |
| 175 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; | 175 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; |
| 176 | 176 | ||
| 177 | Common::PageTable page_table{page_bits}; | 177 | Common::BackingPageTable page_table{page_bits}; |
| 178 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 179 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | 180 | ||
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index f2c83266e..6d522c318 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = { | |||
| 51 | MortonCopy<true, PixelFormat::R8UI>, | 51 | MortonCopy<true, PixelFormat::R8UI>, |
| 52 | MortonCopy<true, PixelFormat::RGBA16F>, | 52 | MortonCopy<true, PixelFormat::RGBA16F>, |
| 53 | MortonCopy<true, PixelFormat::RGBA16U>, | 53 | MortonCopy<true, PixelFormat::RGBA16U>, |
| 54 | MortonCopy<true, PixelFormat::RGBA16S>, | ||
| 54 | MortonCopy<true, PixelFormat::RGBA16UI>, | 55 | MortonCopy<true, PixelFormat::RGBA16UI>, |
| 55 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | 56 | MortonCopy<true, PixelFormat::R11FG11FB10F>, |
| 56 | MortonCopy<true, PixelFormat::RGBA32UI>, | 57 | MortonCopy<true, PixelFormat::RGBA32UI>, |
| @@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = { | |||
| 131 | MortonCopy<false, PixelFormat::R8U>, | 132 | MortonCopy<false, PixelFormat::R8U>, |
| 132 | MortonCopy<false, PixelFormat::R8UI>, | 133 | MortonCopy<false, PixelFormat::R8UI>, |
| 133 | MortonCopy<false, PixelFormat::RGBA16F>, | 134 | MortonCopy<false, PixelFormat::RGBA16F>, |
| 135 | MortonCopy<false, PixelFormat::RGBA16S>, | ||
| 134 | MortonCopy<false, PixelFormat::RGBA16U>, | 136 | MortonCopy<false, PixelFormat::RGBA16U>, |
| 135 | MortonCopy<false, PixelFormat::RGBA16UI>, | 137 | MortonCopy<false, PixelFormat::RGBA16UI>, |
| 136 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | 138 | MortonCopy<false, PixelFormat::R11FG11FB10F>, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8a2db8e36..1af4268a4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -496,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 496 | SyncCullMode(); | 496 | SyncCullMode(); |
| 497 | SyncPrimitiveRestart(); | 497 | SyncPrimitiveRestart(); |
| 498 | SyncScissorTest(); | 498 | SyncScissorTest(); |
| 499 | SyncTransformFeedback(); | ||
| 500 | SyncPointState(); | 499 | SyncPointState(); |
| 501 | SyncPolygonOffset(); | 500 | SyncPolygonOffset(); |
| 502 | SyncAlphaTest(); | 501 | SyncAlphaTest(); |
| @@ -569,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 569 | glTextureBarrier(); | 568 | glTextureBarrier(); |
| 570 | } | 569 | } |
| 571 | 570 | ||
| 572 | ++num_queued_commands; | 571 | BeginTransformFeedback(primitive_mode); |
| 573 | 572 | ||
| 574 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); | 573 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); |
| 575 | const GLsizei num_instances = | 574 | const GLsizei num_instances = |
| @@ -608,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 608 | num_instances, base_instance); | 607 | num_instances, base_instance); |
| 609 | } | 608 | } |
| 610 | } | 609 | } |
| 610 | |||
| 611 | EndTransformFeedback(); | ||
| 612 | |||
| 613 | ++num_queued_commands; | ||
| 611 | } | 614 | } |
| 612 | 615 | ||
| 613 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 616 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -1290,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() { | |||
| 1290 | } | 1293 | } |
| 1291 | } | 1294 | } |
| 1292 | 1295 | ||
| 1293 | void RasterizerOpenGL::SyncTransformFeedback() { | ||
| 1294 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1295 | UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | void RasterizerOpenGL::SyncPointState() { | 1296 | void RasterizerOpenGL::SyncPointState() { |
| 1299 | auto& gpu = system.GPU().Maxwell3D(); | 1297 | auto& gpu = system.GPU().Maxwell3D(); |
| 1300 | auto& flags = gpu.dirty.flags; | 1298 | auto& flags = gpu.dirty.flags; |
| @@ -1370,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { | |||
| 1370 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); | 1368 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); |
| 1371 | } | 1369 | } |
| 1372 | 1370 | ||
| 1371 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | ||
| 1372 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1373 | if (regs.tfb_enabled == 0) { | ||
| 1374 | return; | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 1378 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 1379 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 1380 | |||
| 1381 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1382 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1383 | if (!binding.buffer_enable) { | ||
| 1384 | if (enabled_transform_feedback_buffers[index]) { | ||
| 1385 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||
| 1386 | 0); | ||
| 1387 | } | ||
| 1388 | enabled_transform_feedback_buffers[index] = false; | ||
| 1389 | continue; | ||
| 1390 | } | ||
| 1391 | enabled_transform_feedback_buffers[index] = true; | ||
| 1392 | |||
| 1393 | auto& tfb_buffer = transform_feedback_buffers[index]; | ||
| 1394 | tfb_buffer.Create(); | ||
| 1395 | |||
| 1396 | const GLuint handle = tfb_buffer.handle; | ||
| 1397 | const std::size_t size = binding.buffer_size; | ||
| 1398 | glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||
| 1399 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||
| 1400 | static_cast<GLsizeiptr>(size)); | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | glBeginTransformFeedback(GL_POINTS); | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | void RasterizerOpenGL::EndTransformFeedback() { | ||
| 1407 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1408 | if (regs.tfb_enabled == 0) { | ||
| 1409 | return; | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | glEndTransformFeedback(); | ||
| 1413 | |||
| 1414 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1415 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1416 | if (!binding.buffer_enable) { | ||
| 1417 | continue; | ||
| 1418 | } | ||
| 1419 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 1420 | |||
| 1421 | const GLuint handle = transform_feedback_buffers[index].handle; | ||
| 1422 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 1423 | const std::size_t size = binding.buffer_size; | ||
| 1424 | const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 1425 | glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1428 | |||
| 1373 | } // namespace OpenGL | 1429 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e6424f5d2..2d3be2437 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -168,9 +168,6 @@ private: | |||
| 168 | /// Syncs the scissor test state to match the guest state | 168 | /// Syncs the scissor test state to match the guest state |
| 169 | void SyncScissorTest(); | 169 | void SyncScissorTest(); |
| 170 | 170 | ||
| 171 | /// Syncs the transform feedback state to match the guest state | ||
| 172 | void SyncTransformFeedback(); | ||
| 173 | |||
| 174 | /// Syncs the point state to match the guest state | 171 | /// Syncs the point state to match the guest state |
| 175 | void SyncPointState(); | 172 | void SyncPointState(); |
| 176 | 173 | ||
| @@ -192,6 +189,12 @@ private: | |||
| 192 | /// Syncs the framebuffer sRGB state to match the guest state | 189 | /// Syncs the framebuffer sRGB state to match the guest state |
| 193 | void SyncFramebufferSRGB(); | 190 | void SyncFramebufferSRGB(); |
| 194 | 191 | ||
| 192 | /// Begin a transform feedback | ||
| 193 | void BeginTransformFeedback(GLenum primitive_mode); | ||
| 194 | |||
| 195 | /// End a transform feedback | ||
| 196 | void EndTransformFeedback(); | ||
| 197 | |||
| 195 | /// Check for extension that are not strictly required but are needed for correct emulation | 198 | /// Check for extension that are not strictly required but are needed for correct emulation |
| 196 | void CheckExtensions(); | 199 | void CheckExtensions(); |
| 197 | 200 | ||
| @@ -229,6 +232,11 @@ private: | |||
| 229 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 232 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 230 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 233 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 231 | 234 | ||
| 235 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 236 | transform_feedback_buffers; | ||
| 237 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 238 | enabled_transform_feedback_buffers; | ||
| 239 | |||
| 232 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 240 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 233 | std::size_t num_queued_commands = 0; | 241 | std::size_t num_queued_commands = 0; |
| 234 | 242 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 19d6f3dcb..849839fe3 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "video_core/shader/ast.h" | 23 | #include "video_core/shader/ast.h" |
| 24 | #include "video_core/shader/node.h" | 24 | #include "video_core/shader/node.h" |
| 25 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | #include "video_core/shader/transform_feedback.h" | ||
| 26 | 27 | ||
| 27 | namespace OpenGL { | 28 | namespace OpenGL { |
| 28 | 29 | ||
| @@ -36,6 +37,7 @@ using Tegra::Shader::IpaInterpMode; | |||
| 36 | using Tegra::Shader::IpaMode; | 37 | using Tegra::Shader::IpaMode; |
| 37 | using Tegra::Shader::IpaSampleMode; | 38 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using VideoCommon::Shader::BuildTransformFeedback; | ||
| 39 | using VideoCommon::Shader::Registry; | 41 | using VideoCommon::Shader::Registry; |
| 40 | 42 | ||
| 41 | using namespace std::string_literals; | 43 | using namespace std::string_literals; |
| @@ -49,6 +51,11 @@ class ExprDecompiler; | |||
| 49 | 51 | ||
| 50 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 51 | 53 | ||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 52 | struct TextureOffset {}; | 59 | struct TextureOffset {}; |
| 53 | struct TextureDerivates {}; | 60 | struct TextureDerivates {}; |
| 54 | using TextureArgument = std::pair<Type, Node>; | 61 | using TextureArgument = std::pair<Type, Node>; |
| @@ -390,12 +397,22 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 390 | return stage == ShaderType::Vertex; | 397 | return stage == ShaderType::Vertex; |
| 391 | } | 398 | } |
| 392 | 399 | ||
| 400 | struct GenericVaryingDescription { | ||
| 401 | std::string name; | ||
| 402 | u8 first_element = 0; | ||
| 403 | bool is_scalar = false; | ||
| 404 | }; | ||
| 405 | |||
| 393 | class GLSLDecompiler final { | 406 | class GLSLDecompiler final { |
| 394 | public: | 407 | public: |
| 395 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | 408 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 396 | ShaderType stage, std::string_view identifier, std::string_view suffix) | 409 | ShaderType stage, std::string_view identifier, std::string_view suffix) |
| 397 | : device{device}, ir{ir}, registry{registry}, stage{stage}, | 410 | : device{device}, ir{ir}, registry{registry}, stage{stage}, |
| 398 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {} | 411 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { |
| 412 | if (stage != ShaderType::Compute) { | ||
| 413 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 414 | } | ||
| 415 | } | ||
| 399 | 416 | ||
| 400 | void Decompile() { | 417 | void Decompile() { |
| 401 | DeclareHeader(); | 418 | DeclareHeader(); |
| @@ -403,17 +420,17 @@ public: | |||
| 403 | DeclareGeometry(); | 420 | DeclareGeometry(); |
| 404 | DeclareFragment(); | 421 | DeclareFragment(); |
| 405 | DeclareCompute(); | 422 | DeclareCompute(); |
| 406 | DeclareRegisters(); | ||
| 407 | DeclareCustomVariables(); | ||
| 408 | DeclarePredicates(); | ||
| 409 | DeclareLocalMemory(); | ||
| 410 | DeclareInternalFlags(); | ||
| 411 | DeclareInputAttributes(); | 423 | DeclareInputAttributes(); |
| 412 | DeclareOutputAttributes(); | 424 | DeclareOutputAttributes(); |
| 413 | DeclareConstantBuffers(); | ||
| 414 | DeclareGlobalMemory(); | ||
| 415 | DeclareSamplers(); | ||
| 416 | DeclareImages(); | 425 | DeclareImages(); |
| 426 | DeclareSamplers(); | ||
| 427 | DeclareGlobalMemory(); | ||
| 428 | DeclareConstantBuffers(); | ||
| 429 | DeclareLocalMemory(); | ||
| 430 | DeclareRegisters(); | ||
| 431 | DeclarePredicates(); | ||
| 432 | DeclareInternalFlags(); | ||
| 433 | DeclareCustomVariables(); | ||
| 417 | DeclarePhysicalAttributeReader(); | 434 | DeclarePhysicalAttributeReader(); |
| 418 | 435 | ||
| 419 | code.AddLine("void main() {{"); | 436 | code.AddLine("void main() {{"); |
| @@ -485,7 +502,7 @@ private: | |||
| 485 | if (!identifier.empty()) { | 502 | if (!identifier.empty()) { |
| 486 | code.AddLine("// {}", identifier); | 503 | code.AddLine("// {}", identifier); |
| 487 | } | 504 | } |
| 488 | code.AddLine("#version 430 core"); | 505 | code.AddLine("#version 440 core"); |
| 489 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | 506 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 490 | if (device.HasShaderBallot()) { | 507 | if (device.HasShaderBallot()) { |
| 491 | code.AddLine("#extension GL_ARB_shader_ballot : require"); | 508 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| @@ -570,7 +587,13 @@ private: | |||
| 570 | code.AddLine("out gl_PerVertex {{"); | 587 | code.AddLine("out gl_PerVertex {{"); |
| 571 | ++code.scope; | 588 | ++code.scope; |
| 572 | 589 | ||
| 573 | code.AddLine("vec4 gl_Position;"); | 590 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); |
| 591 | if (!pos_xfb.empty()) { | ||
| 592 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 593 | } | ||
| 594 | const char* pos_type = | ||
| 595 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 596 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 574 | 597 | ||
| 575 | for (const auto attribute : ir.GetOutputAttributes()) { | 598 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 576 | if (attribute == Attribute::Index::ClipDistances0123 || | 599 | if (attribute == Attribute::Index::ClipDistances0123 || |
| @@ -703,7 +726,7 @@ private: | |||
| 703 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | 726 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { |
| 704 | const u32 location{GetGenericAttributeIndex(index)}; | 727 | const u32 location{GetGenericAttributeIndex(index)}; |
| 705 | 728 | ||
| 706 | std::string name{GetInputAttribute(index)}; | 729 | std::string name{GetGenericInputAttribute(index)}; |
| 707 | if (stage == ShaderType::Geometry) { | 730 | if (stage == ShaderType::Geometry) { |
| 708 | name = "gs_" + name + "[]"; | 731 | name = "gs_" + name + "[]"; |
| 709 | } | 732 | } |
| @@ -740,9 +763,59 @@ private: | |||
| 740 | } | 763 | } |
| 741 | } | 764 | } |
| 742 | 765 | ||
| 766 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 767 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 768 | const auto it = transform_feedback.find(location); | ||
| 769 | if (it == transform_feedback.end()) { | ||
| 770 | return {}; | ||
| 771 | } | ||
| 772 | return it->second.components; | ||
| 773 | } | ||
| 774 | |||
| 775 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 776 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 777 | const auto it = transform_feedback.find(location); | ||
| 778 | if (it == transform_feedback.end()) { | ||
| 779 | return {}; | ||
| 780 | } | ||
| 781 | |||
| 782 | const VaryingTFB& tfb = it->second; | ||
| 783 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 784 | tfb.offset, tfb.stride); | ||
| 785 | } | ||
| 786 | |||
| 743 | void DeclareOutputAttribute(Attribute::Index index) { | 787 | void DeclareOutputAttribute(Attribute::Index index) { |
| 744 | const u32 location{GetGenericAttributeIndex(index)}; | 788 | static constexpr std::string_view swizzle = "xyzw"; |
| 745 | code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); | 789 | u8 element = 0; |
| 790 | while (element < 4) { | ||
| 791 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 792 | if (!xfb.empty()) { | ||
| 793 | xfb = fmt::format(", {}", xfb); | ||
| 794 | } | ||
| 795 | const std::size_t remainder = 4 - element; | ||
| 796 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 797 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 798 | |||
| 799 | const u32 location = GetGenericAttributeIndex(index); | ||
| 800 | |||
| 801 | GenericVaryingDescription description; | ||
| 802 | description.first_element = static_cast<u8>(element); | ||
| 803 | description.is_scalar = num_components == 1; | ||
| 804 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 805 | if (element != 0 || num_components != 4) { | ||
| 806 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 807 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 808 | } | ||
| 809 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 810 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 811 | varying_description.insert({offset, description}); | ||
| 812 | } | ||
| 813 | |||
| 814 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 815 | xfb, type, description.name); | ||
| 816 | |||
| 817 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 818 | } | ||
| 746 | } | 819 | } |
| 747 | 820 | ||
| 748 | void DeclareConstantBuffers() { | 821 | void DeclareConstantBuffers() { |
| @@ -1095,7 +1168,7 @@ private: | |||
| 1095 | return {"0", Type::Int}; | 1168 | return {"0", Type::Int}; |
| 1096 | default: | 1169 | default: |
| 1097 | if (IsGenericAttribute(attribute)) { | 1170 | if (IsGenericAttribute(attribute)) { |
| 1098 | return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), | 1171 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), |
| 1099 | Type::Float}; | 1172 | Type::Float}; |
| 1100 | } | 1173 | } |
| 1101 | break; | 1174 | break; |
| @@ -1164,8 +1237,7 @@ private: | |||
| 1164 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; | 1237 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; |
| 1165 | default: | 1238 | default: |
| 1166 | if (IsGenericAttribute(attribute)) { | 1239 | if (IsGenericAttribute(attribute)) { |
| 1167 | return { | 1240 | return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}}; |
| 1168 | {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; | ||
| 1169 | } | 1241 | } |
| 1170 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | 1242 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); |
| 1171 | return {}; | 1243 | return {}; |
| @@ -1937,16 +2009,19 @@ private: | |||
| 1937 | expr += GetSampler(meta->sampler); | 2009 | expr += GetSampler(meta->sampler); |
| 1938 | expr += ", "; | 2010 | expr += ", "; |
| 1939 | 2011 | ||
| 1940 | expr += constructors.at(operation.GetOperandsCount() - 1); | 2012 | expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); |
| 1941 | expr += '('; | 2013 | expr += '('; |
| 1942 | for (std::size_t i = 0; i < count; ++i) { | 2014 | for (std::size_t i = 0; i < count; ++i) { |
| 1943 | expr += VisitOperand(operation, i).AsInt(); | 2015 | if (i > 0) { |
| 1944 | const std::size_t next = i + 1; | ||
| 1945 | if (next == count) | ||
| 1946 | expr += ')'; | ||
| 1947 | else if (next < count) | ||
| 1948 | expr += ", "; | 2016 | expr += ", "; |
| 2017 | } | ||
| 2018 | expr += VisitOperand(operation, i).AsInt(); | ||
| 2019 | } | ||
| 2020 | if (meta->array) { | ||
| 2021 | expr += ", "; | ||
| 2022 | expr += Visit(meta->array).AsInt(); | ||
| 1949 | } | 2023 | } |
| 2024 | expr += ')'; | ||
| 1950 | 2025 | ||
| 1951 | if (meta->lod && !meta->sampler.IsBuffer()) { | 2026 | if (meta->lod && !meta->sampler.IsBuffer()) { |
| 1952 | expr += ", "; | 2027 | expr += ", "; |
| @@ -2376,27 +2451,34 @@ private: | |||
| 2376 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2451 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2377 | 2452 | ||
| 2378 | std::string GetRegister(u32 index) const { | 2453 | std::string GetRegister(u32 index) const { |
| 2379 | return GetDeclarationWithSuffix(index, "gpr"); | 2454 | return AppendSuffix(index, "gpr"); |
| 2380 | } | 2455 | } |
| 2381 | 2456 | ||
| 2382 | std::string GetCustomVariable(u32 index) const { | 2457 | std::string GetCustomVariable(u32 index) const { |
| 2383 | return GetDeclarationWithSuffix(index, "custom_var"); | 2458 | return AppendSuffix(index, "custom_var"); |
| 2384 | } | 2459 | } |
| 2385 | 2460 | ||
| 2386 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | 2461 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 2387 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); | 2462 | return AppendSuffix(static_cast<u32>(pred), "pred"); |
| 2388 | } | 2463 | } |
| 2389 | 2464 | ||
| 2390 | std::string GetInputAttribute(Attribute::Index attribute) const { | 2465 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { |
| 2391 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); | 2466 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); |
| 2392 | } | 2467 | } |
| 2393 | 2468 | ||
| 2394 | std::string GetOutputAttribute(Attribute::Index attribute) const { | 2469 | std::unordered_map<u8, GenericVaryingDescription> varying_description; |
| 2395 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); | 2470 | |
| 2471 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2472 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2473 | const auto& description = varying_description.at(offset); | ||
| 2474 | if (description.is_scalar) { | ||
| 2475 | return description.name; | ||
| 2476 | } | ||
| 2477 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2396 | } | 2478 | } |
| 2397 | 2479 | ||
| 2398 | std::string GetConstBuffer(u32 index) const { | 2480 | std::string GetConstBuffer(u32 index) const { |
| 2399 | return GetDeclarationWithSuffix(index, "cbuf"); | 2481 | return AppendSuffix(index, "cbuf"); |
| 2400 | } | 2482 | } |
| 2401 | 2483 | ||
| 2402 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | 2484 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { |
| @@ -2409,7 +2491,7 @@ private: | |||
| 2409 | } | 2491 | } |
| 2410 | 2492 | ||
| 2411 | std::string GetConstBufferBlock(u32 index) const { | 2493 | std::string GetConstBufferBlock(u32 index) const { |
| 2412 | return GetDeclarationWithSuffix(index, "cbuf_block"); | 2494 | return AppendSuffix(index, "cbuf_block"); |
| 2413 | } | 2495 | } |
| 2414 | 2496 | ||
| 2415 | std::string GetLocalMemory() const { | 2497 | std::string GetLocalMemory() const { |
| @@ -2434,14 +2516,14 @@ private: | |||
| 2434 | } | 2516 | } |
| 2435 | 2517 | ||
| 2436 | std::string GetSampler(const Sampler& sampler) const { | 2518 | std::string GetSampler(const Sampler& sampler) const { |
| 2437 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); | 2519 | return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 2438 | } | 2520 | } |
| 2439 | 2521 | ||
| 2440 | std::string GetImage(const Image& image) const { | 2522 | std::string GetImage(const Image& image) const { |
| 2441 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | 2523 | return AppendSuffix(static_cast<u32>(image.GetIndex()), "image"); |
| 2442 | } | 2524 | } |
| 2443 | 2525 | ||
| 2444 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { | 2526 | std::string AppendSuffix(u32 index, std::string_view name) const { |
| 2445 | if (suffix.empty()) { | 2527 | if (suffix.empty()) { |
| 2446 | return fmt::format("{}{}", name, index); | 2528 | return fmt::format("{}{}", name, index); |
| 2447 | } else { | 2529 | } else { |
| @@ -2477,6 +2559,7 @@ private: | |||
| 2477 | const std::string_view identifier; | 2559 | const std::string_view identifier; |
| 2478 | const std::string_view suffix; | 2560 | const std::string_view suffix; |
| 2479 | const Header header; | 2561 | const Header header; |
| 2562 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2480 | 2563 | ||
| 2481 | ShaderWriter code; | 2564 | ShaderWriter code; |
| 2482 | 2565 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2d3838a7a..f424e3000 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI | 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI |
| 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F | 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F |
| 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U | 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U |
| 56 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S | ||
| 56 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI | 57 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI |
| 57 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F | 58 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F |
| 58 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI | 59 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 12333e8c9..fca5e3ec0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -5,8 +5,11 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <cstdlib> | 7 | #include <cstdlib> |
| 8 | #include <cstring> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 10 | |||
| 9 | #include <glad/glad.h> | 11 | #include <glad/glad.h> |
| 12 | |||
| 10 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 11 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 12 | #include "common/microprofile.h" | 15 | #include "common/microprofile.h" |
| @@ -25,6 +28,8 @@ | |||
| 25 | 28 | ||
| 26 | namespace OpenGL { | 29 | namespace OpenGL { |
| 27 | 30 | ||
| 31 | namespace { | ||
| 32 | |||
| 28 | // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have | 33 | // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have |
| 29 | // to wait on available presentation frames. | 34 | // to wait on available presentation frames. |
| 30 | constexpr std::size_t SWAP_CHAIN_SIZE = 3; | 35 | constexpr std::size_t SWAP_CHAIN_SIZE = 3; |
| @@ -41,124 +46,6 @@ struct Frame { | |||
| 41 | bool is_srgb{}; /// Framebuffer is sRGB or RGB | 46 | bool is_srgb{}; /// Framebuffer is sRGB or RGB |
| 42 | }; | 47 | }; |
| 43 | 48 | ||
| 44 | /** | ||
| 45 | * For smooth Vsync rendering, we want to always present the latest frame that the core generates, | ||
| 46 | * but also make sure that rendering happens at the pace that the frontend dictates. This is a | ||
| 47 | * helper class that the renderer uses to sync frames between the render thread and the presentation | ||
| 48 | * thread | ||
| 49 | */ | ||
| 50 | class FrameMailbox { | ||
| 51 | public: | ||
| 52 | std::mutex swap_chain_lock; | ||
| 53 | std::condition_variable present_cv; | ||
| 54 | std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; | ||
| 55 | std::queue<Frame*> free_queue; | ||
| 56 | std::deque<Frame*> present_queue; | ||
| 57 | Frame* previous_frame{}; | ||
| 58 | |||
| 59 | FrameMailbox() { | ||
| 60 | for (auto& frame : swap_chain) { | ||
| 61 | free_queue.push(&frame); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | ~FrameMailbox() { | ||
| 66 | // lock the mutex and clear out the present and free_queues and notify any people who are | ||
| 67 | // blocked to prevent deadlock on shutdown | ||
| 68 | std::scoped_lock lock{swap_chain_lock}; | ||
| 69 | std::queue<Frame*>().swap(free_queue); | ||
| 70 | present_queue.clear(); | ||
| 71 | present_cv.notify_all(); | ||
| 72 | } | ||
| 73 | |||
| 74 | void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { | ||
| 75 | frame->present.Release(); | ||
| 76 | frame->present.Create(); | ||
| 77 | GLint previous_draw_fbo{}; | ||
| 78 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); | ||
| 79 | glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); | ||
| 80 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 81 | frame->color.handle); | ||
| 82 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 83 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); | ||
| 84 | } | ||
| 85 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); | ||
| 86 | frame->color_reloaded = false; | ||
| 87 | } | ||
| 88 | |||
| 89 | void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { | ||
| 90 | // Recreate the color texture attachment | ||
| 91 | frame->color.Release(); | ||
| 92 | frame->color.Create(); | ||
| 93 | const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; | ||
| 94 | glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); | ||
| 95 | |||
| 96 | // Recreate the FBO for the render target | ||
| 97 | frame->render.Release(); | ||
| 98 | frame->render.Create(); | ||
| 99 | glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); | ||
| 100 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 101 | frame->color.handle); | ||
| 102 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 103 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); | ||
| 104 | } | ||
| 105 | |||
| 106 | frame->width = width; | ||
| 107 | frame->height = height; | ||
| 108 | frame->color_reloaded = true; | ||
| 109 | } | ||
| 110 | |||
| 111 | Frame* GetRenderFrame() { | ||
| 112 | std::unique_lock lock{swap_chain_lock}; | ||
| 113 | |||
| 114 | // If theres no free frames, we will reuse the oldest render frame | ||
| 115 | if (free_queue.empty()) { | ||
| 116 | auto frame = present_queue.back(); | ||
| 117 | present_queue.pop_back(); | ||
| 118 | return frame; | ||
| 119 | } | ||
| 120 | |||
| 121 | Frame* frame = free_queue.front(); | ||
| 122 | free_queue.pop(); | ||
| 123 | return frame; | ||
| 124 | } | ||
| 125 | |||
| 126 | void ReleaseRenderFrame(Frame* frame) { | ||
| 127 | std::unique_lock lock{swap_chain_lock}; | ||
| 128 | present_queue.push_front(frame); | ||
| 129 | present_cv.notify_one(); | ||
| 130 | } | ||
| 131 | |||
| 132 | Frame* TryGetPresentFrame(int timeout_ms) { | ||
| 133 | std::unique_lock lock{swap_chain_lock}; | ||
| 134 | // wait for new entries in the present_queue | ||
| 135 | present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), | ||
| 136 | [&] { return !present_queue.empty(); }); | ||
| 137 | if (present_queue.empty()) { | ||
| 138 | // timed out waiting for a frame to draw so return the previous frame | ||
| 139 | return previous_frame; | ||
| 140 | } | ||
| 141 | |||
| 142 | // free the previous frame and add it back to the free queue | ||
| 143 | if (previous_frame) { | ||
| 144 | free_queue.push(previous_frame); | ||
| 145 | } | ||
| 146 | |||
| 147 | // the newest entries are pushed to the front of the queue | ||
| 148 | Frame* frame = present_queue.front(); | ||
| 149 | present_queue.pop_front(); | ||
| 150 | // remove all old entries from the present queue and move them back to the free_queue | ||
| 151 | for (auto f : present_queue) { | ||
| 152 | free_queue.push(f); | ||
| 153 | } | ||
| 154 | present_queue.clear(); | ||
| 155 | previous_frame = frame; | ||
| 156 | return frame; | ||
| 157 | } | ||
| 158 | }; | ||
| 159 | |||
| 160 | namespace { | ||
| 161 | |||
| 162 | constexpr char VERTEX_SHADER[] = R"( | 49 | constexpr char VERTEX_SHADER[] = R"( |
| 163 | #version 430 core | 50 | #version 430 core |
| 164 | 51 | ||
| @@ -211,6 +98,24 @@ struct ScreenRectVertex { | |||
| 211 | std::array<GLfloat, 2> tex_coord; | 98 | std::array<GLfloat, 2> tex_coord; |
| 212 | }; | 99 | }; |
| 213 | 100 | ||
| 101 | /// Returns true if any debug tool is attached | ||
| 102 | bool HasDebugTool() { | ||
| 103 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||
| 104 | if (nsight) { | ||
| 105 | return true; | ||
| 106 | } | ||
| 107 | |||
| 108 | GLint num_extensions; | ||
| 109 | glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); | ||
| 110 | for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { | ||
| 111 | const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); | ||
| 112 | if (!std::strcmp(name, "GL_EXT_debug_tool")) { | ||
| 113 | return true; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | return false; | ||
| 117 | } | ||
| 118 | |||
| 214 | /** | 119 | /** |
| 215 | * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left | 120 | * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left |
| 216 | * corner and (width, height) on the lower-bottom. | 121 | * corner and (width, height) on the lower-bottom. |
| @@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 294 | 199 | ||
| 295 | } // Anonymous namespace | 200 | } // Anonymous namespace |
| 296 | 201 | ||
| 202 | /** | ||
| 203 | * For smooth Vsync rendering, we want to always present the latest frame that the core generates, | ||
| 204 | * but also make sure that rendering happens at the pace that the frontend dictates. This is a | ||
| 205 | * helper class that the renderer uses to sync frames between the render thread and the presentation | ||
| 206 | * thread | ||
| 207 | */ | ||
| 208 | class FrameMailbox { | ||
| 209 | public: | ||
| 210 | std::mutex swap_chain_lock; | ||
| 211 | std::condition_variable present_cv; | ||
| 212 | std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; | ||
| 213 | std::queue<Frame*> free_queue; | ||
| 214 | std::deque<Frame*> present_queue; | ||
| 215 | Frame* previous_frame{}; | ||
| 216 | |||
| 217 | FrameMailbox() : has_debug_tool{HasDebugTool()} { | ||
| 218 | for (auto& frame : swap_chain) { | ||
| 219 | free_queue.push(&frame); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | ~FrameMailbox() { | ||
| 224 | // lock the mutex and clear out the present and free_queues and notify any people who are | ||
| 225 | // blocked to prevent deadlock on shutdown | ||
| 226 | std::scoped_lock lock{swap_chain_lock}; | ||
| 227 | std::queue<Frame*>().swap(free_queue); | ||
| 228 | present_queue.clear(); | ||
| 229 | present_cv.notify_all(); | ||
| 230 | } | ||
| 231 | |||
| 232 | void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { | ||
| 233 | frame->present.Release(); | ||
| 234 | frame->present.Create(); | ||
| 235 | GLint previous_draw_fbo{}; | ||
| 236 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); | ||
| 237 | glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); | ||
| 238 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 239 | frame->color.handle); | ||
| 240 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 241 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); | ||
| 242 | } | ||
| 243 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); | ||
| 244 | frame->color_reloaded = false; | ||
| 245 | } | ||
| 246 | |||
| 247 | void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { | ||
| 248 | // Recreate the color texture attachment | ||
| 249 | frame->color.Release(); | ||
| 250 | frame->color.Create(); | ||
| 251 | const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; | ||
| 252 | glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); | ||
| 253 | |||
| 254 | // Recreate the FBO for the render target | ||
| 255 | frame->render.Release(); | ||
| 256 | frame->render.Create(); | ||
| 257 | glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); | ||
| 258 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 259 | frame->color.handle); | ||
| 260 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 261 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); | ||
| 262 | } | ||
| 263 | |||
| 264 | frame->width = width; | ||
| 265 | frame->height = height; | ||
| 266 | frame->color_reloaded = true; | ||
| 267 | } | ||
| 268 | |||
| 269 | Frame* GetRenderFrame() { | ||
| 270 | std::unique_lock lock{swap_chain_lock}; | ||
| 271 | |||
| 272 | // If theres no free frames, we will reuse the oldest render frame | ||
| 273 | if (free_queue.empty()) { | ||
| 274 | auto frame = present_queue.back(); | ||
| 275 | present_queue.pop_back(); | ||
| 276 | return frame; | ||
| 277 | } | ||
| 278 | |||
| 279 | Frame* frame = free_queue.front(); | ||
| 280 | free_queue.pop(); | ||
| 281 | return frame; | ||
| 282 | } | ||
| 283 | |||
| 284 | void ReleaseRenderFrame(Frame* frame) { | ||
| 285 | std::unique_lock lock{swap_chain_lock}; | ||
| 286 | present_queue.push_front(frame); | ||
| 287 | present_cv.notify_one(); | ||
| 288 | |||
| 289 | DebugNotifyNextFrame(); | ||
| 290 | } | ||
| 291 | |||
| 292 | Frame* TryGetPresentFrame(int timeout_ms) { | ||
| 293 | DebugWaitForNextFrame(); | ||
| 294 | |||
| 295 | std::unique_lock lock{swap_chain_lock}; | ||
| 296 | // wait for new entries in the present_queue | ||
| 297 | present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), | ||
| 298 | [&] { return !present_queue.empty(); }); | ||
| 299 | if (present_queue.empty()) { | ||
| 300 | // timed out waiting for a frame to draw so return the previous frame | ||
| 301 | return previous_frame; | ||
| 302 | } | ||
| 303 | |||
| 304 | // free the previous frame and add it back to the free queue | ||
| 305 | if (previous_frame) { | ||
| 306 | free_queue.push(previous_frame); | ||
| 307 | } | ||
| 308 | |||
| 309 | // the newest entries are pushed to the front of the queue | ||
| 310 | Frame* frame = present_queue.front(); | ||
| 311 | present_queue.pop_front(); | ||
| 312 | // remove all old entries from the present queue and move them back to the free_queue | ||
| 313 | for (auto f : present_queue) { | ||
| 314 | free_queue.push(f); | ||
| 315 | } | ||
| 316 | present_queue.clear(); | ||
| 317 | previous_frame = frame; | ||
| 318 | return frame; | ||
| 319 | } | ||
| 320 | |||
| 321 | private: | ||
| 322 | std::mutex debug_synch_mutex; | ||
| 323 | std::condition_variable debug_synch_condition; | ||
| 324 | std::atomic_int frame_for_debug{}; | ||
| 325 | const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step | ||
| 326 | |||
| 327 | /// Signal that a new frame is available (called from GPU thread) | ||
| 328 | void DebugNotifyNextFrame() { | ||
| 329 | if (!has_debug_tool) { | ||
| 330 | return; | ||
| 331 | } | ||
| 332 | frame_for_debug++; | ||
| 333 | std::lock_guard lock{debug_synch_mutex}; | ||
| 334 | debug_synch_condition.notify_one(); | ||
| 335 | } | ||
| 336 | |||
| 337 | /// Wait for a new frame to be available (called from presentation thread) | ||
| 338 | void DebugWaitForNextFrame() { | ||
| 339 | if (!has_debug_tool) { | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | const int last_frame = frame_for_debug; | ||
| 343 | std::unique_lock lock{debug_synch_mutex}; | ||
| 344 | debug_synch_condition.wait(lock, | ||
| 345 | [this, last_frame] { return frame_for_debug > last_frame; }); | ||
| 346 | } | ||
| 347 | }; | ||
| 348 | |||
| 297 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) | 349 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) |
| 298 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, | 350 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, |
| 299 | frame_mailbox{std::make_unique<FrameMailbox>()} {} | 351 | frame_mailbox{std::make_unique<FrameMailbox>()} {} |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index df3ac707c..0e2e5e6c7 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -125,6 +125,7 @@ struct FormatTuple { | |||
| 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI | 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI |
| 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F | 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F |
| 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U | 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U |
| 128 | {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S | ||
| 128 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI | 129 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI |
| 129 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F | 130 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F |
| 130 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI | 131 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI |
| @@ -331,6 +332,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 331 | return vk::Format::eR16G16B16Unorm; | 332 | return vk::Format::eR16G16B16Unorm; |
| 332 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 333 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 333 | return vk::Format::eR16G16B16A16Unorm; | 334 | return vk::Format::eR16G16B16A16Unorm; |
| 335 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | ||
| 336 | return vk::Format::eA2B10G10R10UnormPack32; | ||
| 334 | default: | 337 | default: |
| 335 | break; | 338 | break; |
| 336 | } | 339 | } |
| @@ -364,6 +367,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 364 | return vk::Format::eR8G8B8A8Uint; | 367 | return vk::Format::eR8G8B8A8Uint; |
| 365 | case Maxwell::VertexAttribute::Size::Size_32: | 368 | case Maxwell::VertexAttribute::Size::Size_32: |
| 366 | return vk::Format::eR32Uint; | 369 | return vk::Format::eR32Uint; |
| 370 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 371 | return vk::Format::eR32G32Uint; | ||
| 372 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 373 | return vk::Format::eR32G32B32Uint; | ||
| 367 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 374 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 368 | return vk::Format::eR32G32B32A32Uint; | 375 | return vk::Format::eR32G32B32A32Uint; |
| 369 | default: | 376 | default: |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 886bde3b9..28d2fbc4f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 107 | features.occlusionQueryPrecise = true; | 107 | features.occlusionQueryPrecise = true; |
| 108 | features.fragmentStoresAndAtomics = true; | 108 | features.fragmentStoresAndAtomics = true; |
| 109 | features.shaderImageGatherExtended = true; | 109 | features.shaderImageGatherExtended = true; |
| 110 | features.shaderStorageImageReadWithoutFormat = | 110 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; |
| 111 | is_shader_storage_img_read_without_format_supported; | ||
| 112 | features.shaderStorageImageWriteWithoutFormat = true; | 111 | features.shaderStorageImageWriteWithoutFormat = true; |
| 113 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | 112 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; |
| 114 | 113 | ||
| @@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 148 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | 147 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 150 | vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; | ||
| 151 | if (ext_transform_feedback) { | ||
| 152 | transform_feedback.transformFeedback = true; | ||
| 153 | transform_feedback.geometryStreams = true; | ||
| 154 | SetNext(next, transform_feedback); | ||
| 155 | } else { | ||
| 156 | LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); | ||
| 157 | } | ||
| 158 | |||
| 151 | if (!ext_depth_range_unrestricted) { | 159 | if (!ext_depth_range_unrestricted) { |
| 152 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 160 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 153 | } | 161 | } |
| @@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 385 | } | 393 | } |
| 386 | }; | 394 | }; |
| 387 | 395 | ||
| 388 | extensions.reserve(14); | 396 | extensions.reserve(15); |
| 389 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 397 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 390 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 398 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |
| 391 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | 399 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |
| @@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 397 | 405 | ||
| 398 | [[maybe_unused]] const bool nsight = | 406 | [[maybe_unused]] const bool nsight = |
| 399 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 407 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 400 | bool khr_shader_float16_int8{}; | 408 | bool has_khr_shader_float16_int8{}; |
| 401 | bool ext_subgroup_size_control{}; | 409 | bool has_ext_subgroup_size_control{}; |
| 410 | bool has_ext_transform_feedback{}; | ||
| 402 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 411 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 403 | Test(extension, khr_uniform_buffer_standard_layout, | 412 | Test(extension, khr_uniform_buffer_standard_layout, |
| 404 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 413 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 405 | Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 414 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, |
| 415 | false); | ||
| 406 | Test(extension, ext_depth_range_unrestricted, | 416 | Test(extension, ext_depth_range_unrestricted, |
| 407 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 417 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 408 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 418 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 409 | Test(extension, ext_shader_viewport_index_layer, | 419 | Test(extension, ext_shader_viewport_index_layer, |
| 410 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); | 420 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); |
| 411 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, | 421 | Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, |
| 422 | false); | ||
| 423 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, | ||
| 412 | false); | 424 | false); |
| 413 | if (Settings::values.renderer_debug) { | 425 | if (Settings::values.renderer_debug) { |
| 414 | Test(extension, nv_device_diagnostic_checkpoints, | 426 | Test(extension, nv_device_diagnostic_checkpoints, |
| @@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 416 | } | 428 | } |
| 417 | } | 429 | } |
| 418 | 430 | ||
| 419 | if (khr_shader_float16_int8) { | 431 | if (has_khr_shader_float16_int8) { |
| 420 | is_float16_supported = | 432 | is_float16_supported = |
| 421 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | 433 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; |
| 422 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 434 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 423 | } | 435 | } |
| 424 | 436 | ||
| 425 | if (ext_subgroup_size_control) { | 437 | if (has_ext_subgroup_size_control) { |
| 426 | const auto features = | 438 | const auto features = |
| 427 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); | 439 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); |
| 428 | const auto properties = | 440 | const auto properties = |
| @@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 439 | is_warp_potentially_bigger = true; | 451 | is_warp_potentially_bigger = true; |
| 440 | } | 452 | } |
| 441 | 453 | ||
| 454 | if (has_ext_transform_feedback) { | ||
| 455 | const auto features = | ||
| 456 | GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); | ||
| 457 | const auto properties = | ||
| 458 | GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); | ||
| 459 | |||
| 460 | if (features.transformFeedback && features.geometryStreams && | ||
| 461 | properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && | ||
| 462 | properties.transformFeedbackQueries && properties.transformFeedbackDraw) { | ||
| 463 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); | ||
| 464 | ext_transform_feedback = true; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 442 | return extensions; | 468 | return extensions; |
| 443 | } | 469 | } |
| 444 | 470 | ||
| @@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 467 | 493 | ||
| 468 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | 494 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { |
| 469 | const auto supported_features{physical.getFeatures(dldi)}; | 495 | const auto supported_features{physical.getFeatures(dldi)}; |
| 470 | is_shader_storage_img_read_without_format_supported = | 496 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 471 | supported_features.shaderStorageImageReadWithoutFormat; | ||
| 472 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | 497 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); |
| 473 | } | 498 | } |
| 474 | 499 | ||
| @@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti | |||
| 510 | vk::Format::eR32G32Sfloat, | 535 | vk::Format::eR32G32Sfloat, |
| 511 | vk::Format::eR32G32Uint, | 536 | vk::Format::eR32G32Uint, |
| 512 | vk::Format::eR16G16B16A16Uint, | 537 | vk::Format::eR16G16B16A16Uint, |
| 538 | vk::Format::eR16G16B16A16Snorm, | ||
| 513 | vk::Format::eR16G16B16A16Unorm, | 539 | vk::Format::eR16G16B16A16Unorm, |
| 514 | vk::Format::eR16G16Unorm, | 540 | vk::Format::eR16G16Unorm, |
| 515 | vk::Format::eR16G16Snorm, | 541 | vk::Format::eR16G16Snorm, |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730..6e656517f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -122,11 +122,6 @@ public: | |||
| 122 | return properties.limits.maxPushConstantsSize; | 122 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /// Returns true if Shader storage Image Read Without Format supported. | ||
| 126 | bool IsShaderStorageImageReadWithoutFormatSupported() const { | ||
| 127 | return is_shader_storage_img_read_without_format_supported; | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Returns true if ASTC is natively supported. | 125 | /// Returns true if ASTC is natively supported. |
| 131 | bool IsOptimalAstcSupported() const { | 126 | bool IsOptimalAstcSupported() const { |
| 132 | return is_optimal_astc_supported; | 127 | return is_optimal_astc_supported; |
| @@ -147,6 +142,11 @@ public: | |||
| 147 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; | 142 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; |
| 148 | } | 143 | } |
| 149 | 144 | ||
| 145 | /// Returns true if formatless image load is supported. | ||
| 146 | bool IsFormatlessImageLoadSupported() const { | ||
| 147 | return is_formatless_image_load_supported; | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. |
| 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 152 | return khr_uniform_buffer_standard_layout; | 152 | return khr_uniform_buffer_standard_layout; |
| @@ -167,6 +167,11 @@ public: | |||
| 167 | return ext_shader_viewport_index_layer; | 167 | return ext_shader_viewport_index_layer; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | /// Returns true if the device supports VK_EXT_transform_feedback. | ||
| 171 | bool IsExtTransformFeedbackSupported() const { | ||
| 172 | return ext_transform_feedback; | ||
| 173 | } | ||
| 174 | |||
| 170 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | 175 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. |
| 171 | bool IsNvDeviceDiagnosticCheckpoints() const { | 176 | bool IsNvDeviceDiagnosticCheckpoints() const { |
| 172 | return nv_device_diagnostic_checkpoints; | 177 | return nv_device_diagnostic_checkpoints; |
| @@ -214,26 +219,26 @@ private: | |||
| 214 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( | 219 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 215 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 220 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 216 | 221 | ||
| 217 | const vk::PhysicalDevice physical; ///< Physical device. | 222 | const vk::PhysicalDevice physical; ///< Physical device. |
| 218 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 223 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 219 | vk::PhysicalDeviceProperties properties; ///< Device properties. | 224 | vk::PhysicalDeviceProperties properties; ///< Device properties. |
| 220 | UniqueDevice logical; ///< Logical device. | 225 | UniqueDevice logical; ///< Logical device. |
| 221 | vk::Queue graphics_queue; ///< Main graphics queue. | 226 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 222 | vk::Queue present_queue; ///< Main present queue. | 227 | vk::Queue present_queue; ///< Main present queue. |
| 223 | u32 graphics_family{}; ///< Main graphics queue family index. | 228 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 224 | u32 present_family{}; ///< Main present queue family index. | 229 | u32 present_family{}; ///< Main present queue family index. |
| 225 | vk::DriverIdKHR driver_id{}; ///< Driver ID. | 230 | vk::DriverIdKHR driver_id{}; ///< Driver ID. |
| 226 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. | 231 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed |
| 227 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 232 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 228 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 233 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 229 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 234 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 235 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | ||
| 230 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 236 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 231 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 237 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 232 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 238 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 233 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 239 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 240 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | ||
| 234 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | 241 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. |
| 235 | bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage | ||
| 236 | ///< image read without format | ||
| 237 | 242 | ||
| 238 | // Telemetry parameters | 243 | // Telemetry parameters |
| 239 | std::string vendor_name; ///< Device's driver name. | 244 | std::string vendor_name; ///< Device's driver name. |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index ebf85f311..91e7b7791 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | |||
| 179 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 179 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 180 | const VKDevice& device, VKScheduler& scheduler, | 180 | const VKDevice& device, VKScheduler& scheduler, |
| 181 | VKDescriptorPool& descriptor_pool, | 181 | VKDescriptorPool& descriptor_pool, |
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue) | 182 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 183 | VKRenderPassCache& renderpass_cache) | ||
| 183 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | 184 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, |
| 184 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | 185 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, |
| 185 | renderpass_cache(device) {} | 186 | renderpass_cache{renderpass_cache} {} |
| 186 | 187 | ||
| 187 | VKPipelineCache::~VKPipelineCache() = default; | 188 | VKPipelineCache::~VKPipelineCache() = default; |
| 188 | 189 | ||
| @@ -273,9 +274,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 273 | specialization.workgroup_size = key.workgroup_size; | 274 | specialization.workgroup_size = key.workgroup_size; |
| 274 | specialization.shared_memory_size = key.shared_memory_size; | 275 | specialization.shared_memory_size = key.shared_memory_size; |
| 275 | 276 | ||
| 276 | const SPIRVShader spirv_shader{ | 277 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, |
| 277 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | 278 | shader->GetRegistry(), specialization), |
| 278 | shader->GetEntries()}; | 279 | shader->GetEntries()}; |
| 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | 280 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, |
| 280 | update_descriptor_queue, spirv_shader); | 281 | update_descriptor_queue, spirv_shader); |
| 281 | return *entry; | 282 | return *entry; |
| @@ -324,8 +325,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 324 | const auto& gpu = system.GPU().Maxwell3D(); | 325 | const auto& gpu = system.GPU().Maxwell3D(); |
| 325 | 326 | ||
| 326 | Specialization specialization; | 327 | Specialization specialization; |
| 327 | specialization.primitive_topology = fixed_state.input_assembly.topology; | 328 | if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { |
| 328 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 329 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | 329 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); |
| 330 | specialization.point_size = fixed_state.input_assembly.point_size; | 330 | specialization.point_size = fixed_state.input_assembly.point_size; |
| 331 | } | 331 | } |
| @@ -333,9 +333,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 333 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | 333 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; |
| 334 | } | 334 | } |
| 335 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 335 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 336 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 337 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 338 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 339 | 336 | ||
| 340 | SPIRVProgram program; | 337 | SPIRVProgram program; |
| 341 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 338 | std::vector<vk::DescriptorSetLayoutBinding> bindings; |
| @@ -356,8 +353,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 356 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 353 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 357 | const auto program_type = GetShaderType(program_enum); | 354 | const auto program_type = GetShaderType(program_enum); |
| 358 | const auto& entries = shader->GetEntries(); | 355 | const auto& entries = shader->GetEntries(); |
| 359 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | 356 | program[stage] = { |
| 360 | entries}; | 357 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
| 358 | entries}; | ||
| 361 | 359 | ||
| 362 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | 360 | if (program_enum == Maxwell::ShaderProgram::VertexA) { |
| 363 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 361 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index e292526bb..c4c112290 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -132,6 +132,10 @@ public: | |||
| 132 | return shader_ir; | 132 | return shader_ir; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 136 | return registry; | ||
| 137 | } | ||
| 138 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | 139 | const VideoCommon::Shader::ShaderIR& GetIR() const { |
| 136 | return shader_ir; | 140 | return shader_ir; |
| 137 | } | 141 | } |
| @@ -157,7 +161,8 @@ public: | |||
| 157 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 161 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 158 | const VKDevice& device, VKScheduler& scheduler, | 162 | const VKDevice& device, VKScheduler& scheduler, |
| 159 | VKDescriptorPool& descriptor_pool, | 163 | VKDescriptorPool& descriptor_pool, |
| 160 | VKUpdateDescriptorQueue& update_descriptor_queue); | 164 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 165 | VKRenderPassCache& renderpass_cache); | ||
| 161 | ~VKPipelineCache(); | 166 | ~VKPipelineCache(); |
| 162 | 167 | ||
| 163 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | 168 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); |
| @@ -180,8 +185,7 @@ private: | |||
| 180 | VKScheduler& scheduler; | 185 | VKScheduler& scheduler; |
| 181 | VKDescriptorPool& descriptor_pool; | 186 | VKDescriptorPool& descriptor_pool; |
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | 187 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 183 | 188 | VKRenderPassCache& renderpass_cache; | |
| 184 | VKRenderPassCache renderpass_cache; | ||
| 185 | 189 | ||
| 186 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 190 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 187 | 191 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2bcb17b56..755aad643 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||
| 287 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, | 287 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, |
| 288 | memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, | 288 | memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, |
| 289 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | 289 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), |
| 290 | update_descriptor_queue(device, scheduler), | 290 | update_descriptor_queue(device, scheduler), renderpass_cache(device), |
| 291 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 291 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 292 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 292 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 293 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | 293 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, |
| 294 | staging_pool), | 294 | staging_pool), |
| 295 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | 295 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 296 | renderpass_cache), | ||
| 296 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | 297 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |
| 297 | sampler_cache(device), query_cache(system, *this, device, scheduler) { | 298 | sampler_cache(device), query_cache(system, *this, device, scheduler) { |
| 298 | scheduler.SetQueryCache(query_cache); | 299 | scheduler.SetQueryCache(query_cache); |
| @@ -347,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 347 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | 348 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); |
| 348 | } | 349 | } |
| 349 | 350 | ||
| 351 | BeginTransformFeedback(); | ||
| 352 | |||
| 350 | const auto pipeline_layout = pipeline.GetLayout(); | 353 | const auto pipeline_layout = pipeline.GetLayout(); |
| 351 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | 354 | const auto descriptor_set = pipeline.CommitDescriptorSet(); |
| 352 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | 355 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { |
| @@ -356,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 356 | } | 359 | } |
| 357 | draw_params.Draw(cmdbuf, dld); | 360 | draw_params.Draw(cmdbuf, dld); |
| 358 | }); | 361 | }); |
| 362 | |||
| 363 | EndTransformFeedback(); | ||
| 359 | } | 364 | } |
| 360 | 365 | ||
| 361 | void RasterizerVulkan::Clear() { | 366 | void RasterizerVulkan::Clear() { |
| 362 | MICROPROFILE_SCOPE(Vulkan_Clearing); | 367 | MICROPROFILE_SCOPE(Vulkan_Clearing); |
| 363 | 368 | ||
| 364 | query_cache.UpdateCounters(); | ||
| 365 | |||
| 366 | const auto& gpu = system.GPU().Maxwell3D(); | 369 | const auto& gpu = system.GPU().Maxwell3D(); |
| 367 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | 370 | if (!system.GPU().Maxwell3D().ShouldExecute()) { |
| 368 | return; | 371 | return; |
| 369 | } | 372 | } |
| 370 | 373 | ||
| 374 | sampled_views.clear(); | ||
| 375 | image_views.clear(); | ||
| 376 | |||
| 377 | query_cache.UpdateCounters(); | ||
| 378 | |||
| 371 | const auto& regs = gpu.regs; | 379 | const auto& regs = gpu.regs; |
| 372 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 380 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 373 | regs.clear_buffers.A; | 381 | regs.clear_buffers.A; |
| @@ -376,52 +384,54 @@ void RasterizerVulkan::Clear() { | |||
| 376 | if (!use_color && !use_depth && !use_stencil) { | 384 | if (!use_color && !use_depth && !use_stencil) { |
| 377 | return; | 385 | return; |
| 378 | } | 386 | } |
| 379 | // Clearing images requires to be out of a renderpass | ||
| 380 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 381 | 387 | ||
| 382 | // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. | 388 | [[maybe_unused]] const auto texceptions = UpdateAttachments(); |
| 389 | DEBUG_ASSERT(texceptions.none()); | ||
| 390 | SetupImageTransitions(0, color_attachments, zeta_attachment); | ||
| 383 | 391 | ||
| 384 | if (use_color) { | 392 | const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); |
| 385 | View color_view; | 393 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); |
| 386 | { | 394 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); |
| 387 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | 395 | |
| 388 | color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); | 396 | const auto& scissor = regs.scissor_test[0]; |
| 389 | } | 397 | const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); |
| 398 | vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; | ||
| 399 | scissor_extent.width = std::min(scissor_extent.width, render_area.width); | ||
| 400 | scissor_extent.height = std::min(scissor_extent.height, render_area.height); | ||
| 390 | 401 | ||
| 391 | color_view->Transition(vk::ImageLayout::eTransferDstOptimal, | 402 | const u32 layer = regs.clear_buffers.layer; |
| 392 | vk::PipelineStageFlagBits::eTransfer, | 403 | const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); |
| 393 | vk::AccessFlagBits::eTransferWrite); | ||
| 394 | 404 | ||
| 405 | if (use_color) { | ||
| 395 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | 406 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], |
| 396 | regs.clear_color[2], regs.clear_color[3]}; | 407 | regs.clear_color[2], regs.clear_color[3]}; |
| 397 | const vk::ClearColorValue clear(clear_color); | 408 | const vk::ClearValue clear_value{clear_color}; |
| 398 | scheduler.Record([image = color_view->GetImage(), | 409 | const u32 color_attachment = regs.clear_buffers.RT; |
| 399 | subresource = color_view->GetImageSubresourceRange(), | 410 | scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { |
| 400 | clear](auto cmdbuf, auto& dld) { | 411 | const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, |
| 401 | cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, | 412 | clear_value); |
| 402 | dld); | 413 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); |
| 403 | }); | 414 | }); |
| 404 | } | 415 | } |
| 405 | if (use_depth || use_stencil) { | ||
| 406 | View zeta_surface; | ||
| 407 | { | ||
| 408 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 409 | zeta_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 410 | } | ||
| 411 | 416 | ||
| 412 | zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, | 417 | if (!use_depth && !use_stencil) { |
| 413 | vk::PipelineStageFlagBits::eTransfer, | 418 | return; |
| 414 | vk::AccessFlagBits::eTransferWrite); | 419 | } |
| 415 | 420 | vk::ImageAspectFlags aspect_flags; | |
| 416 | const vk::ClearDepthStencilValue clear(regs.clear_depth, | 421 | if (use_depth) { |
| 417 | static_cast<u32>(regs.clear_stencil)); | 422 | aspect_flags |= vk::ImageAspectFlagBits::eDepth; |
| 418 | scheduler.Record([image = zeta_surface->GetImage(), | ||
| 419 | subresource = zeta_surface->GetImageSubresourceRange(), | ||
| 420 | clear](auto cmdbuf, auto& dld) { | ||
| 421 | cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, | ||
| 422 | subresource, dld); | ||
| 423 | }); | ||
| 424 | } | 423 | } |
| 424 | if (use_stencil) { | ||
| 425 | aspect_flags |= vk::ImageAspectFlagBits::eStencil; | ||
| 426 | } | ||
| 427 | |||
| 428 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | ||
| 429 | clear_rect, aspect_flags](auto cmdbuf, auto& dld) { | ||
| 430 | const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); | ||
| 431 | const vk::ClearValue clear_value{clear_zeta}; | ||
| 432 | const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); | ||
| 433 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); | ||
| 434 | }); | ||
| 425 | } | 435 | } |
| 426 | 436 | ||
| 427 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 437 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { |
| @@ -738,6 +748,44 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 738 | UpdateStencilFaces(regs); | 748 | UpdateStencilFaces(regs); |
| 739 | } | 749 | } |
| 740 | 750 | ||
| 751 | void RasterizerVulkan::BeginTransformFeedback() { | ||
| 752 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 753 | if (regs.tfb_enabled == 0) { | ||
| 754 | return; | ||
| 755 | } | ||
| 756 | |||
| 757 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 758 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 759 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 760 | |||
| 761 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | ||
| 762 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 763 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 764 | |||
| 765 | const auto& binding = regs.tfb_bindings[0]; | ||
| 766 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 767 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 768 | |||
| 769 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 770 | const std::size_t size = binding.buffer_size; | ||
| 771 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 772 | |||
| 773 | scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { | ||
| 774 | cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); | ||
| 775 | cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); | ||
| 776 | }); | ||
| 777 | } | ||
| 778 | |||
| 779 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 780 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 781 | if (regs.tfb_enabled == 0) { | ||
| 782 | return; | ||
| 783 | } | ||
| 784 | |||
| 785 | scheduler.Record( | ||
| 786 | [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); | ||
| 787 | } | ||
| 788 | |||
| 741 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 789 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| 742 | BufferBindings& buffer_bindings) { | 790 | BufferBindings& buffer_bindings) { |
| 743 | const auto& regs = system.GPU().Maxwell3D().regs; | 791 | const auto& regs = system.GPU().Maxwell3D().regs; |
| @@ -1109,7 +1157,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | |||
| 1109 | // This implementation assumes that all attributes are used in the shader. | 1157 | // This implementation assumes that all attributes are used in the shader. |
| 1110 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | 1158 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; |
| 1111 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | 1159 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; |
| 1112 | DEBUG_ASSERT(end > start); | 1160 | DEBUG_ASSERT(end >= start); |
| 1113 | 1161 | ||
| 1114 | size += (end - start + 1) * regs.vertex_array[index].enable; | 1162 | size += (end - start + 1) * regs.vertex_array[index].enable; |
| 1115 | } | 1163 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96ea05f0a..3185868e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -169,6 +169,10 @@ private: | |||
| 169 | 169 | ||
| 170 | void UpdateDynamicStates(); | 170 | void UpdateDynamicStates(); |
| 171 | 171 | ||
| 172 | void BeginTransformFeedback(); | ||
| 173 | |||
| 174 | void EndTransformFeedback(); | ||
| 175 | |||
| 172 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | 176 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); |
| 173 | 177 | ||
| 174 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 178 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| @@ -249,6 +253,7 @@ private: | |||
| 249 | VKStagingBufferPool staging_pool; | 253 | VKStagingBufferPool staging_pool; |
| 250 | VKDescriptorPool descriptor_pool; | 254 | VKDescriptorPool descriptor_pool; |
| 251 | VKUpdateDescriptorQueue update_descriptor_queue; | 255 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 256 | VKRenderPassCache renderpass_cache; | ||
| 252 | QuadArrayPass quad_array_pass; | 257 | QuadArrayPass quad_array_pass; |
| 253 | Uint8Pass uint8_pass; | 258 | Uint8Pass uint8_pass; |
| 254 | 259 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cfcca5af0..51ecb5567 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <functional> | 5 | #include <functional> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <optional> | ||
| 8 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include <unordered_map> | ||
| 9 | #include <utility> | 11 | #include <utility> |
| 10 | 12 | ||
| 11 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| @@ -24,6 +26,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 25 | #include "video_core/shader/node.h" | 27 | #include "video_core/shader/node.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 28 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader/transform_feedback.h" | ||
| 27 | 30 | ||
| 28 | namespace Vulkan { | 31 | namespace Vulkan { |
| 29 | 32 | ||
| @@ -93,6 +96,12 @@ struct VertexIndices { | |||
| 93 | std::optional<u32> clip_distances; | 96 | std::optional<u32> clip_distances; |
| 94 | }; | 97 | }; |
| 95 | 98 | ||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 96 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { |
| 97 | ASSERT(!sampler.IsBuffer()); | 106 | ASSERT(!sampler.IsBuffer()); |
| 98 | switch (sampler.GetType()) { | 107 | switch (sampler.GetType()) { |
| @@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) { | |||
| 266 | class SPIRVDecompiler final : public Sirit::Module { | 275 | class SPIRVDecompiler final : public Sirit::Module { |
| 267 | public: | 276 | public: |
| 268 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, | 277 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, |
| 269 | const Specialization& specialization) | 278 | const Registry& registry, const Specialization& specialization) |
| 270 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, | 279 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, |
| 271 | specialization{specialization} { | 280 | registry{registry}, specialization{specialization} { |
| 281 | if (stage != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 272 | AddCapability(spv::Capability::Shader); | 285 | AddCapability(spv::Capability::Shader); |
| 273 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); |
| 274 | AddCapability(spv::Capability::ImageQuery); | 287 | AddCapability(spv::Capability::ImageQuery); |
| @@ -286,6 +299,15 @@ public: | |||
| 286 | AddExtension("SPV_KHR_variable_pointers"); | 299 | AddExtension("SPV_KHR_variable_pointers"); |
| 287 | AddExtension("SPV_KHR_shader_draw_parameters"); | 300 | AddExtension("SPV_KHR_shader_draw_parameters"); |
| 288 | 301 | ||
| 302 | if (!transform_feedback.empty()) { | ||
| 303 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 304 | AddCapability(spv::Capability::TransformFeedback); | ||
| 305 | } else { | ||
| 306 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 307 | "supported on this device"); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 289 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { |
| 290 | if (ir.UsesViewportIndex()) { | 312 | if (ir.UsesViewportIndex()) { |
| 291 | AddCapability(spv::Capability::MultiViewport); | 313 | AddCapability(spv::Capability::MultiViewport); |
| @@ -296,7 +318,7 @@ public: | |||
| 296 | } | 318 | } |
| 297 | } | 319 | } |
| 298 | 320 | ||
| 299 | if (device.IsShaderStorageImageReadWithoutFormatSupported()) { | 321 | if (device.IsFormatlessImageLoadSupported()) { |
| 300 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | 322 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); |
| 301 | } | 323 | } |
| 302 | 324 | ||
| @@ -318,25 +340,29 @@ public: | |||
| 318 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 340 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 319 | header.common2.threads_per_input_primitive); | 341 | header.common2.threads_per_input_primitive); |
| 320 | break; | 342 | break; |
| 321 | case ShaderType::TesselationEval: | 343 | case ShaderType::TesselationEval: { |
| 344 | const auto& info = registry.GetGraphicsInfo(); | ||
| 322 | AddCapability(spv::Capability::Tessellation); | 345 | AddCapability(spv::Capability::Tessellation); |
| 323 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | 346 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); |
| 324 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); | 347 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); |
| 325 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); | 348 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); |
| 326 | AddExecutionMode(main, specialization.tessellation.clockwise | 349 | AddExecutionMode(main, info.tessellation_clockwise |
| 327 | ? spv::ExecutionMode::VertexOrderCw | 350 | ? spv::ExecutionMode::VertexOrderCw |
| 328 | : spv::ExecutionMode::VertexOrderCcw); | 351 | : spv::ExecutionMode::VertexOrderCcw); |
| 329 | break; | 352 | break; |
| 330 | case ShaderType::Geometry: | 353 | } |
| 354 | case ShaderType::Geometry: { | ||
| 355 | const auto& info = registry.GetGraphicsInfo(); | ||
| 331 | AddCapability(spv::Capability::Geometry); | 356 | AddCapability(spv::Capability::Geometry); |
| 332 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | 357 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); |
| 333 | AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); | 358 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); |
| 334 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | 359 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); |
| 335 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 360 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 336 | header.common4.max_output_vertices); | 361 | header.common4.max_output_vertices); |
| 337 | // TODO(Rodrigo): Where can we get this info from? | 362 | // TODO(Rodrigo): Where can we get this info from? |
| 338 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | 363 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); |
| 339 | break; | 364 | break; |
| 365 | } | ||
| 340 | case ShaderType::Fragment: | 366 | case ShaderType::Fragment: |
| 341 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | 367 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); |
| 342 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | 368 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); |
| @@ -545,7 +571,8 @@ private: | |||
| 545 | if (stage != ShaderType::Geometry) { | 571 | if (stage != ShaderType::Geometry) { |
| 546 | return; | 572 | return; |
| 547 | } | 573 | } |
| 548 | const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 574 | const auto& info = registry.GetGraphicsInfo(); |
| 575 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 549 | DeclareInputVertexArray(num_input); | 576 | DeclareInputVertexArray(num_input); |
| 550 | DeclareOutputVertex(); | 577 | DeclareOutputVertex(); |
| 551 | } | 578 | } |
| @@ -742,12 +769,34 @@ private: | |||
| 742 | } | 769 | } |
| 743 | 770 | ||
| 744 | void DeclareOutputAttributes() { | 771 | void DeclareOutputAttributes() { |
| 772 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | |||
| 776 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 745 | for (const auto index : ir.GetOutputAttributes()) { | 777 | for (const auto index : ir.GetOutputAttributes()) { |
| 746 | if (!IsGenericAttribute(index)) { | 778 | if (!IsGenericAttribute(index)) { |
| 747 | continue; | 779 | continue; |
| 748 | } | 780 | } |
| 749 | const u32 location = GetGenericAttributeLocation(index); | 781 | DeclareOutputAttribute(index); |
| 750 | Id type = t_float4; | 782 | } |
| 783 | } | ||
| 784 | |||
| 785 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 786 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 787 | |||
| 788 | const u32 location = GetGenericAttributeLocation(index); | ||
| 789 | u8 element = 0; | ||
| 790 | while (element < 4) { | ||
| 791 | const std::size_t remainder = 4 - element; | ||
| 792 | |||
| 793 | std::size_t num_components = remainder; | ||
| 794 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 795 | if (tfb) { | ||
| 796 | num_components = tfb->components; | ||
| 797 | } | ||
| 798 | |||
| 799 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 751 | Id varying_default = v_varying_default; | 800 | Id varying_default = v_varying_default; |
| 752 | if (IsOutputAttributeArray()) { | 801 | if (IsOutputAttributeArray()) { |
| 753 | const u32 num = GetNumOutputVertices(); | 802 | const u32 num = GetNumOutputVertices(); |
| @@ -760,13 +809,45 @@ private: | |||
| 760 | } | 809 | } |
| 761 | type = TypePointer(spv::StorageClass::Output, type); | 810 | type = TypePointer(spv::StorageClass::Output, type); |
| 762 | 811 | ||
| 812 | std::string name = fmt::format("out_attr{}", location); | ||
| 813 | if (num_components < 4 || element > 0) { | ||
| 814 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 815 | } | ||
| 816 | |||
| 763 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | 817 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); |
| 764 | Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); | 818 | Name(AddGlobalVariable(id), name); |
| 765 | output_attributes.emplace(index, id); | 819 | |
| 820 | GenericVaryingDescription description; | ||
| 821 | description.id = id; | ||
| 822 | description.first_element = element; | ||
| 823 | description.is_scalar = num_components == 1; | ||
| 824 | for (u32 i = 0; i < num_components; ++i) { | ||
| 825 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 826 | output_attributes.emplace(offset, description); | ||
| 827 | } | ||
| 766 | interfaces.push_back(id); | 828 | interfaces.push_back(id); |
| 767 | 829 | ||
| 768 | Decorate(id, spv::Decoration::Location, location); | 830 | Decorate(id, spv::Decoration::Location, location); |
| 831 | if (element > 0) { | ||
| 832 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 833 | } | ||
| 834 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 835 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 836 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 837 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 838 | } | ||
| 839 | |||
| 840 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 841 | } | ||
| 842 | } | ||
| 843 | |||
| 844 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 845 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 846 | const auto it = transform_feedback.find(location); | ||
| 847 | if (it == transform_feedback.end()) { | ||
| 848 | return {}; | ||
| 769 | } | 849 | } |
| 850 | return it->second; | ||
| 770 | } | 851 | } |
| 771 | 852 | ||
| 772 | u32 DeclareConstantBuffers(u32 binding) { | 853 | u32 DeclareConstantBuffers(u32 binding) { |
| @@ -898,7 +979,7 @@ private: | |||
| 898 | u32 GetNumInputVertices() const { | 979 | u32 GetNumInputVertices() const { |
| 899 | switch (stage) { | 980 | switch (stage) { |
| 900 | case ShaderType::Geometry: | 981 | case ShaderType::Geometry: |
| 901 | return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 982 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); |
| 902 | case ShaderType::TesselationControl: | 983 | case ShaderType::TesselationControl: |
| 903 | case ShaderType::TesselationEval: | 984 | case ShaderType::TesselationEval: |
| 904 | return NumInputPatches; | 985 | return NumInputPatches; |
| @@ -1346,8 +1427,14 @@ private: | |||
| 1346 | } | 1427 | } |
| 1347 | default: | 1428 | default: |
| 1348 | if (IsGenericAttribute(attribute)) { | 1429 | if (IsGenericAttribute(attribute)) { |
| 1349 | const Id composite = output_attributes.at(attribute); | 1430 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); |
| 1350 | return {ArrayPass(t_out_float, composite, {element}), Type::Float}; | 1431 | const GenericVaryingDescription description = output_attributes.at(offset); |
| 1432 | const Id composite = description.id; | ||
| 1433 | std::vector<u32> indices; | ||
| 1434 | if (!description.is_scalar) { | ||
| 1435 | indices.push_back(element - description.first_element); | ||
| 1436 | } | ||
| 1437 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1351 | } | 1438 | } |
| 1352 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | 1439 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", |
| 1353 | static_cast<u32>(attribute)); | 1440 | static_cast<u32>(attribute)); |
| @@ -1793,7 +1880,7 @@ private: | |||
| 1793 | } | 1880 | } |
| 1794 | 1881 | ||
| 1795 | Expression ImageLoad(Operation operation) { | 1882 | Expression ImageLoad(Operation operation) { |
| 1796 | if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { | 1883 | if (!device.IsFormatlessImageLoadSupported()) { |
| 1797 | return {v_float_zero, Type::Float}; | 1884 | return {v_float_zero, Type::Float}; |
| 1798 | } | 1885 | } |
| 1799 | 1886 | ||
| @@ -2258,11 +2345,11 @@ private: | |||
| 2258 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | 2345 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { |
| 2259 | switch (type) { | 2346 | switch (type) { |
| 2260 | case Type::Float: | 2347 | case Type::Float: |
| 2261 | return {nullptr, t_float2, t_float3, t_float4}; | 2348 | return {t_float, t_float2, t_float3, t_float4}; |
| 2262 | case Type::Int: | 2349 | case Type::Int: |
| 2263 | return {nullptr, t_int2, t_int3, t_int4}; | 2350 | return {t_int, t_int2, t_int3, t_int4}; |
| 2264 | case Type::Uint: | 2351 | case Type::Uint: |
| 2265 | return {nullptr, t_uint2, t_uint3, t_uint4}; | 2352 | return {t_uint, t_uint2, t_uint3, t_uint4}; |
| 2266 | default: | 2353 | default: |
| 2267 | UNIMPLEMENTED(); | 2354 | UNIMPLEMENTED(); |
| 2268 | return {}; | 2355 | return {}; |
| @@ -2495,7 +2582,9 @@ private: | |||
| 2495 | const ShaderIR& ir; | 2582 | const ShaderIR& ir; |
| 2496 | const ShaderType stage; | 2583 | const ShaderType stage; |
| 2497 | const Tegra::Shader::Header header; | 2584 | const Tegra::Shader::Header header; |
| 2585 | const Registry& registry; | ||
| 2498 | const Specialization& specialization; | 2586 | const Specialization& specialization; |
| 2587 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2499 | 2588 | ||
| 2500 | const Id t_void = Name(TypeVoid(), "void"); | 2589 | const Id t_void = Name(TypeVoid(), "void"); |
| 2501 | 2590 | ||
| @@ -2584,7 +2673,7 @@ private: | |||
| 2584 | Id shared_memory{}; | 2673 | Id shared_memory{}; |
| 2585 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | 2674 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; |
| 2586 | std::map<Attribute::Index, Id> input_attributes; | 2675 | std::map<Attribute::Index, Id> input_attributes; |
| 2587 | std::map<Attribute::Index, Id> output_attributes; | 2676 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2588 | std::map<u32, Id> constant_buffers; | 2677 | std::map<u32, Id> constant_buffers; |
| 2589 | std::map<GlobalMemoryBase, Id> global_buffers; | 2678 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2590 | std::map<u32, TexelBuffer> texel_buffers; | 2679 | std::map<u32, TexelBuffer> texel_buffers; |
| @@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2870 | } | 2959 | } |
| 2871 | 2960 | ||
| 2872 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 2961 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 2873 | ShaderType stage, const Specialization& specialization) { | 2962 | ShaderType stage, const VideoCommon::Shader::Registry& registry, |
| 2874 | return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); | 2963 | const Specialization& specialization) { |
| 2964 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 2875 | } | 2965 | } |
| 2876 | 2966 | ||
| 2877 | } // namespace Vulkan | 2967 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9e..ffea4709e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 18 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | 19 | #include "video_core/shader/shader_ir.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -91,17 +92,9 @@ struct Specialization final { | |||
| 91 | u32 shared_memory_size{}; | 92 | u32 shared_memory_size{}; |
| 92 | 93 | ||
| 93 | // Graphics specific | 94 | // Graphics specific |
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | 97 | bool ndc_minus_one_to_one{}; |
| 98 | |||
| 99 | // Tessellation specific | ||
| 100 | struct { | ||
| 101 | Maxwell::TessellationPrimitive primitive{}; | ||
| 102 | Maxwell::TessellationSpacing spacing{}; | ||
| 103 | bool clockwise{}; | ||
| 104 | } tessellation; | ||
| 105 | }; | 98 | }; |
| 106 | // Old gcc versions don't consider this trivially copyable. | 99 | // Old gcc versions don't consider this trivially copyable. |
| 107 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 100 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
| @@ -114,6 +107,8 @@ struct SPIRVShader { | |||
| 114 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | 107 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 115 | 108 | ||
| 116 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 109 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 117 | Tegra::Engines::ShaderType stage, const Specialization& specialization); | 110 | Tegra::Engines::ShaderType stage, |
| 111 | const VideoCommon::Shader::Registry& registry, | ||
| 112 | const Specialization& specialization); | ||
| 118 | 113 | ||
| 119 | } // namespace Vulkan | 114 | } // namespace Vulkan |
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index e02bcd097..8e3b46e8e 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp | |||
| @@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | |||
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | 20 | Node op_a = GetRegister(instr.gpr8); |
| 23 | op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); | 21 | Node op_b = [&] { |
| 24 | 22 | switch (opcode->get().GetId()) { | |
| 25 | switch (opcode->get().GetId()) { | 23 | case OpCode::Id::BFE_R: |
| 26 | case OpCode::Id::BFE_IMM: { | 24 | return GetRegister(instr.gpr20); |
| 27 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 25 | case OpCode::Id::BFE_C: |
| 28 | "Condition codes generation in BFE is not implemented"); | 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 29 | 34 | ||
| 30 | const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); | 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); |
| 31 | const Node outer_shift_imm = | ||
| 32 | Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); | ||
| 33 | 36 | ||
| 34 | const Node inner_shift = | 37 | const bool is_signed = instr.bfe.is_signed; |
| 35 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); | ||
| 36 | const Node outer_shift = | ||
| 37 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); | ||
| 38 | 38 | ||
| 39 | SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); | 39 | // using reverse parallel method in |
| 40 | SetRegister(bb, instr.gpr0, outer_shift); | 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel |
| 41 | break; | 41 | // note for later if possible to implement faster method. |
| 42 | } | 42 | if (instr.bfe.brev) { |
| 43 | default: | 43 | const auto swap = [&](u32 s, u32 mask) { |
| 44 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | 44 | Node v1 = |
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 45 | } | 65 | } |
| 46 | 66 | ||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 47 | return pc; | 74 | return pc; |
| 48 | } | 75 | } |
| 49 | 76 | ||
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index b3dcd291c..76c56abb5 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) | |||
| 68 | return OperationCode::UBitwiseXor; | 68 | return OperationCode::UBitwiseXor; |
| 69 | case OperationCode::IBitwiseNot: | 69 | case OperationCode::IBitwiseNot: |
| 70 | return OperationCode::UBitwiseNot; | 70 | return OperationCode::UBitwiseNot; |
| 71 | case OperationCode::IBitfieldExtract: | ||
| 72 | return OperationCode::UBitfieldExtract; | ||
| 71 | case OperationCode::IBitfieldInsert: | 73 | case OperationCode::IBitfieldInsert: |
| 72 | return OperationCode::UBitfieldInsert; | 74 | return OperationCode::UBitfieldInsert; |
| 73 | case OperationCode::IBitCount: | 75 | case OperationCode::IBitCount: |
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp new file mode 100644 index 000000000..22a933761 --- /dev/null +++ b/src/video_core/shader/transform_feedback.cpp | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h new file mode 100644 index 000000000..77d05f64c --- /dev/null +++ b/src/video_core/shader/transform_feedback.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 9707c353d..cc7181229 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) | |||
| 111 | return PixelFormat::RGBA16F; | 111 | return PixelFormat::RGBA16F; |
| 112 | case Tegra::RenderTargetFormat::RGBA16_UNORM: | 112 | case Tegra::RenderTargetFormat::RGBA16_UNORM: |
| 113 | return PixelFormat::RGBA16U; | 113 | return PixelFormat::RGBA16U; |
| 114 | case Tegra::RenderTargetFormat::RGBA16_SNORM: | ||
| 115 | return PixelFormat::RGBA16S; | ||
| 114 | case Tegra::RenderTargetFormat::RGBA16_UINT: | 116 | case Tegra::RenderTargetFormat::RGBA16_UINT: |
| 115 | return PixelFormat::RGBA16UI; | 117 | return PixelFormat::RGBA16UI; |
| 116 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: | 118 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index d88109e5a..ae8817465 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -25,82 +25,83 @@ enum class PixelFormat { | |||
| 25 | R8UI = 7, | 25 | R8UI = 7, |
| 26 | RGBA16F = 8, | 26 | RGBA16F = 8, |
| 27 | RGBA16U = 9, | 27 | RGBA16U = 9, |
| 28 | RGBA16UI = 10, | 28 | RGBA16S = 10, |
| 29 | R11FG11FB10F = 11, | 29 | RGBA16UI = 11, |
| 30 | RGBA32UI = 12, | 30 | R11FG11FB10F = 12, |
| 31 | DXT1 = 13, | 31 | RGBA32UI = 13, |
| 32 | DXT23 = 14, | 32 | DXT1 = 14, |
| 33 | DXT45 = 15, | 33 | DXT23 = 15, |
| 34 | DXN1 = 16, // This is also known as BC4 | 34 | DXT45 = 16, |
| 35 | DXN2UNORM = 17, | 35 | DXN1 = 17, // This is also known as BC4 |
| 36 | DXN2SNORM = 18, | 36 | DXN2UNORM = 18, |
| 37 | BC7U = 19, | 37 | DXN2SNORM = 19, |
| 38 | BC6H_UF16 = 20, | 38 | BC7U = 20, |
| 39 | BC6H_SF16 = 21, | 39 | BC6H_UF16 = 21, |
| 40 | ASTC_2D_4X4 = 22, | 40 | BC6H_SF16 = 22, |
| 41 | BGRA8 = 23, | 41 | ASTC_2D_4X4 = 23, |
| 42 | RGBA32F = 24, | 42 | BGRA8 = 24, |
| 43 | RG32F = 25, | 43 | RGBA32F = 25, |
| 44 | R32F = 26, | 44 | RG32F = 26, |
| 45 | R16F = 27, | 45 | R32F = 27, |
| 46 | R16U = 28, | 46 | R16F = 28, |
| 47 | R16S = 29, | 47 | R16U = 29, |
| 48 | R16UI = 30, | 48 | R16S = 30, |
| 49 | R16I = 31, | 49 | R16UI = 31, |
| 50 | RG16 = 32, | 50 | R16I = 32, |
| 51 | RG16F = 33, | 51 | RG16 = 33, |
| 52 | RG16UI = 34, | 52 | RG16F = 34, |
| 53 | RG16I = 35, | 53 | RG16UI = 35, |
| 54 | RG16S = 36, | 54 | RG16I = 36, |
| 55 | RGB32F = 37, | 55 | RG16S = 37, |
| 56 | RGBA8_SRGB = 38, | 56 | RGB32F = 38, |
| 57 | RG8U = 39, | 57 | RGBA8_SRGB = 39, |
| 58 | RG8S = 40, | 58 | RG8U = 40, |
| 59 | RG32UI = 41, | 59 | RG8S = 41, |
| 60 | RGBX16F = 42, | 60 | RG32UI = 42, |
| 61 | R32UI = 43, | 61 | RGBX16F = 43, |
| 62 | R32I = 44, | 62 | R32UI = 44, |
| 63 | ASTC_2D_8X8 = 45, | 63 | R32I = 45, |
| 64 | ASTC_2D_8X5 = 46, | 64 | ASTC_2D_8X8 = 46, |
| 65 | ASTC_2D_5X4 = 47, | 65 | ASTC_2D_8X5 = 47, |
| 66 | BGRA8_SRGB = 48, | 66 | ASTC_2D_5X4 = 48, |
| 67 | DXT1_SRGB = 49, | 67 | BGRA8_SRGB = 49, |
| 68 | DXT23_SRGB = 50, | 68 | DXT1_SRGB = 50, |
| 69 | DXT45_SRGB = 51, | 69 | DXT23_SRGB = 51, |
| 70 | BC7U_SRGB = 52, | 70 | DXT45_SRGB = 52, |
| 71 | R4G4B4A4U = 53, | 71 | BC7U_SRGB = 53, |
| 72 | ASTC_2D_4X4_SRGB = 54, | 72 | R4G4B4A4U = 54, |
| 73 | ASTC_2D_8X8_SRGB = 55, | 73 | ASTC_2D_4X4_SRGB = 55, |
| 74 | ASTC_2D_8X5_SRGB = 56, | 74 | ASTC_2D_8X8_SRGB = 56, |
| 75 | ASTC_2D_5X4_SRGB = 57, | 75 | ASTC_2D_8X5_SRGB = 57, |
| 76 | ASTC_2D_5X5 = 58, | 76 | ASTC_2D_5X4_SRGB = 58, |
| 77 | ASTC_2D_5X5_SRGB = 59, | 77 | ASTC_2D_5X5 = 59, |
| 78 | ASTC_2D_10X8 = 60, | 78 | ASTC_2D_5X5_SRGB = 60, |
| 79 | ASTC_2D_10X8_SRGB = 61, | 79 | ASTC_2D_10X8 = 61, |
| 80 | ASTC_2D_6X6 = 62, | 80 | ASTC_2D_10X8_SRGB = 62, |
| 81 | ASTC_2D_6X6_SRGB = 63, | 81 | ASTC_2D_6X6 = 63, |
| 82 | ASTC_2D_10X10 = 64, | 82 | ASTC_2D_6X6_SRGB = 64, |
| 83 | ASTC_2D_10X10_SRGB = 65, | 83 | ASTC_2D_10X10 = 65, |
| 84 | ASTC_2D_12X12 = 66, | 84 | ASTC_2D_10X10_SRGB = 66, |
| 85 | ASTC_2D_12X12_SRGB = 67, | 85 | ASTC_2D_12X12 = 67, |
| 86 | ASTC_2D_8X6 = 68, | 86 | ASTC_2D_12X12_SRGB = 68, |
| 87 | ASTC_2D_8X6_SRGB = 69, | 87 | ASTC_2D_8X6 = 69, |
| 88 | ASTC_2D_6X5 = 70, | 88 | ASTC_2D_8X6_SRGB = 70, |
| 89 | ASTC_2D_6X5_SRGB = 71, | 89 | ASTC_2D_6X5 = 71, |
| 90 | E5B9G9R9F = 72, | 90 | ASTC_2D_6X5_SRGB = 72, |
| 91 | E5B9G9R9F = 73, | ||
| 91 | 92 | ||
| 92 | MaxColorFormat, | 93 | MaxColorFormat, |
| 93 | 94 | ||
| 94 | // Depth formats | 95 | // Depth formats |
| 95 | Z32F = 73, | 96 | Z32F = 74, |
| 96 | Z16 = 74, | 97 | Z16 = 75, |
| 97 | 98 | ||
| 98 | MaxDepthFormat, | 99 | MaxDepthFormat, |
| 99 | 100 | ||
| 100 | // DepthStencil formats | 101 | // DepthStencil formats |
| 101 | Z24S8 = 75, | 102 | Z24S8 = 76, |
| 102 | S8Z24 = 76, | 103 | S8Z24 = 77, |
| 103 | Z32FS8 = 77, | 104 | Z32FS8 = 78, |
| 104 | 105 | ||
| 105 | MaxDepthStencilFormat, | 106 | MaxDepthStencilFormat, |
| 106 | 107 | ||
| @@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | |||
| 138 | 0, // R8UI | 139 | 0, // R8UI |
| 139 | 0, // RGBA16F | 140 | 0, // RGBA16F |
| 140 | 0, // RGBA16U | 141 | 0, // RGBA16U |
| 142 | 0, // RGBA16S | ||
| 141 | 0, // RGBA16UI | 143 | 0, // RGBA16UI |
| 142 | 0, // R11FG11FB10F | 144 | 0, // R11FG11FB10F |
| 143 | 0, // RGBA32UI | 145 | 0, // RGBA32UI |
| @@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 235 | 1, // R8UI | 237 | 1, // R8UI |
| 236 | 1, // RGBA16F | 238 | 1, // RGBA16F |
| 237 | 1, // RGBA16U | 239 | 1, // RGBA16U |
| 240 | 1, // RGBA16S | ||
| 238 | 1, // RGBA16UI | 241 | 1, // RGBA16UI |
| 239 | 1, // R11FG11FB10F | 242 | 1, // R11FG11FB10F |
| 240 | 1, // RGBA32UI | 243 | 1, // RGBA32UI |
| @@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | |||
| 324 | 1, // R8UI | 327 | 1, // R8UI |
| 325 | 1, // RGBA16F | 328 | 1, // RGBA16F |
| 326 | 1, // RGBA16U | 329 | 1, // RGBA16U |
| 330 | 1, // RGBA16S | ||
| 327 | 1, // RGBA16UI | 331 | 1, // RGBA16UI |
| 328 | 1, // R11FG11FB10F | 332 | 1, // R11FG11FB10F |
| 329 | 1, // RGBA32UI | 333 | 1, // RGBA32UI |
| @@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 413 | 8, // R8UI | 417 | 8, // R8UI |
| 414 | 64, // RGBA16F | 418 | 64, // RGBA16F |
| 415 | 64, // RGBA16U | 419 | 64, // RGBA16U |
| 420 | 64, // RGBA16S | ||
| 416 | 64, // RGBA16UI | 421 | 64, // RGBA16UI |
| 417 | 32, // R11FG11FB10F | 422 | 32, // R11FG11FB10F |
| 418 | 128, // RGBA32UI | 423 | 128, // RGBA32UI |
| @@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table | |||
| 517 | SurfaceCompression::None, // R8UI | 522 | SurfaceCompression::None, // R8UI |
| 518 | SurfaceCompression::None, // RGBA16F | 523 | SurfaceCompression::None, // RGBA16F |
| 519 | SurfaceCompression::None, // RGBA16U | 524 | SurfaceCompression::None, // RGBA16U |
| 525 | SurfaceCompression::None, // RGBA16S | ||
| 520 | SurfaceCompression::None, // RGBA16UI | 526 | SurfaceCompression::None, // RGBA16UI |
| 521 | SurfaceCompression::None, // R11FG11FB10F | 527 | SurfaceCompression::None, // R11FG11FB10F |
| 522 | SurfaceCompression::None, // RGBA32UI | 528 | SurfaceCompression::None, // RGBA32UI |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index cc3ad8417..e151c26c4 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 75> DefinitionTable = {{ | 44 | constexpr std::array<Table, 76> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{ | |||
| 61 | {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, | 61 | {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, |
| 62 | {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, | 62 | {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, |
| 63 | 63 | ||
| 64 | {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, | ||
| 64 | {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, | 65 | {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, |
| 65 | {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, | 66 | {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, |
| 66 | {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, | 67 | {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index f00839313..9931c5ef7 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta | |||
| 113 | params.height = tic.Height(); | 113 | params.height = tic.Height(); |
| 114 | params.depth = tic.Depth(); | 114 | params.depth = tic.Depth(); |
| 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); |
| 116 | if (params.target == SurfaceTarget::TextureCubemap || | 116 | if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { |
| 117 | params.target == SurfaceTarget::TextureCubeArray) { | 117 | params.depth = 1; |
| 118 | } else if (params.target == SurfaceTarget::TextureCubemap || | ||
| 119 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 118 | params.depth *= 6; | 120 | params.depth *= 6; |
| 119 | } | 121 | } |
| 120 | params.num_levels = tic.max_mip_level + 1; | 122 | params.num_levels = tic.max_mip_level + 1; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 51373b687..6cdbe63d0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -104,6 +104,11 @@ public: | |||
| 104 | if (!cache_addr) { | 104 | if (!cache_addr) { |
| 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 106 | } | 106 | } |
| 107 | |||
| 108 | if (!IsTypeCompatible(tic.texture_type, entry)) { | ||
| 109 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 110 | } | ||
| 111 | |||
| 107 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 112 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; |
| 108 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 113 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); |
| 109 | if (guard_samplers) { | 114 | if (guard_samplers) { |
| @@ -914,13 +919,15 @@ private: | |||
| 914 | params.width = 1; | 919 | params.width = 1; |
| 915 | params.height = 1; | 920 | params.height = 1; |
| 916 | params.depth = 1; | 921 | params.depth = 1; |
| 922 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | ||
| 923 | params.depth = 6; | ||
| 924 | } | ||
| 917 | params.pitch = 4; | 925 | params.pitch = 4; |
| 918 | params.num_levels = 1; | 926 | params.num_levels = 1; |
| 919 | params.emulated_levels = 1; | 927 | params.emulated_levels = 1; |
| 920 | params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; | 928 | params.pixel_format = VideoCore::Surface::PixelFormat::R8U; |
| 921 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 929 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; |
| 922 | auto surface = CreateSurface(0ULL, params); | 930 | auto surface = CreateSurface(0ULL, params); |
| 923 | invalid_memory.clear(); | ||
| 924 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | 931 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); |
| 925 | surface->UploadTexture(invalid_memory); | 932 | surface->UploadTexture(invalid_memory); |
| 926 | surface->MarkAsModified(false, Tick()); | 933 | surface->MarkAsModified(false, Tick()); |
| @@ -1082,6 +1089,36 @@ private: | |||
| 1082 | return siblings_table[static_cast<std::size_t>(format)]; | 1089 | return siblings_table[static_cast<std::size_t>(format)]; |
| 1083 | } | 1090 | } |
| 1084 | 1091 | ||
| 1092 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | ||
| 1093 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | ||
| 1094 | const VideoCommon::Shader::Sampler& entry) { | ||
| 1095 | const auto shader_type = entry.GetType(); | ||
| 1096 | switch (tic_type) { | ||
| 1097 | case Tegra::Texture::TextureType::Texture1D: | ||
| 1098 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 1099 | return shader_type == Tegra::Shader::TextureType::Texture1D; | ||
| 1100 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 1101 | // TODO(Rodrigo): Assume as valid for now | ||
| 1102 | return true; | ||
| 1103 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1104 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1105 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1106 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1107 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1108 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1109 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1110 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1111 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1112 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1113 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1114 | return true; | ||
| 1115 | } | ||
| 1116 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray(); | ||
| 1117 | } | ||
| 1118 | UNREACHABLE(); | ||
| 1119 | return true; | ||
| 1120 | } | ||
| 1121 | |||
| 1085 | struct FramebufferTargetInfo { | 1122 | struct FramebufferTargetInfo { |
| 1086 | TSurface target; | 1123 | TSurface target; |
| 1087 | TView view; | 1124 | TView view; |