diff options
Diffstat (limited to 'src')
43 files changed, 1605 insertions, 1534 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9afc6105d..fbebed715 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |
| 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |
| 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |
| 41 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" | ||
| 42 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" | ||
| 43 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | 41 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |
| 44 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | 42 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |
| 45 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | 43 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |
| @@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 72 | "${VIDEO_CORE}/shader/ast.h" | 70 | "${VIDEO_CORE}/shader/ast.h" |
| 73 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | 71 | "${VIDEO_CORE}/shader/compiler_settings.cpp" |
| 74 | "${VIDEO_CORE}/shader/compiler_settings.h" | 72 | "${VIDEO_CORE}/shader/compiler_settings.h" |
| 75 | "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||
| 77 | "${VIDEO_CORE}/shader/control_flow.cpp" | 73 | "${VIDEO_CORE}/shader/control_flow.cpp" |
| 78 | "${VIDEO_CORE}/shader/control_flow.h" | 74 | "${VIDEO_CORE}/shader/control_flow.h" |
| 79 | "${VIDEO_CORE}/shader/decode.cpp" | 75 | "${VIDEO_CORE}/shader/decode.cpp" |
| @@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 82 | "${VIDEO_CORE}/shader/node.h" | 78 | "${VIDEO_CORE}/shader/node.h" |
| 83 | "${VIDEO_CORE}/shader/node_helper.cpp" | 79 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| 84 | "${VIDEO_CORE}/shader/node_helper.h" | 80 | "${VIDEO_CORE}/shader/node_helper.h" |
| 81 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/registry.h" | ||
| 85 | "${VIDEO_CORE}/shader/shader_ir.cpp" | 83 | "${VIDEO_CORE}/shader/shader_ir.cpp" |
| 86 | "${VIDEO_CORE}/shader/shader_ir.h" | 84 | "${VIDEO_CORE}/shader/shader_ir.h" |
| 87 | "${VIDEO_CORE}/shader/track.cpp" | 85 | "${VIDEO_CORE}/shader/track.cpp" |
| 86 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 88 | # and also check that the scm_rev files haven't changed | 88 | # and also check that the scm_rev files haven't changed |
| 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |
| 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 14f3b4569..91df062d7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -65,8 +65,6 @@ add_library(video_core STATIC | |||
| 65 | renderer_opengl/gl_shader_decompiler.h | 65 | renderer_opengl/gl_shader_decompiler.h |
| 66 | renderer_opengl/gl_shader_disk_cache.cpp | 66 | renderer_opengl/gl_shader_disk_cache.cpp |
| 67 | renderer_opengl/gl_shader_disk_cache.h | 67 | renderer_opengl/gl_shader_disk_cache.h |
| 68 | renderer_opengl/gl_shader_gen.cpp | ||
| 69 | renderer_opengl/gl_shader_gen.h | ||
| 70 | renderer_opengl/gl_shader_manager.cpp | 68 | renderer_opengl/gl_shader_manager.cpp |
| 71 | renderer_opengl/gl_shader_manager.h | 69 | renderer_opengl/gl_shader_manager.h |
| 72 | renderer_opengl/gl_shader_util.cpp | 70 | renderer_opengl/gl_shader_util.cpp |
| @@ -118,8 +116,6 @@ add_library(video_core STATIC | |||
| 118 | shader/ast.h | 116 | shader/ast.h |
| 119 | shader/compiler_settings.cpp | 117 | shader/compiler_settings.cpp |
| 120 | shader/compiler_settings.h | 118 | shader/compiler_settings.h |
| 121 | shader/const_buffer_locker.cpp | ||
| 122 | shader/const_buffer_locker.h | ||
| 123 | shader/control_flow.cpp | 119 | shader/control_flow.cpp |
| 124 | shader/control_flow.h | 120 | shader/control_flow.h |
| 125 | shader/decode.cpp | 121 | shader/decode.cpp |
| @@ -128,9 +124,13 @@ add_library(video_core STATIC | |||
| 128 | shader/node_helper.cpp | 124 | shader/node_helper.cpp |
| 129 | shader/node_helper.h | 125 | shader/node_helper.h |
| 130 | shader/node.h | 126 | shader/node.h |
| 127 | shader/registry.cpp | ||
| 128 | shader/registry.h | ||
| 131 | shader/shader_ir.cpp | 129 | shader/shader_ir.cpp |
| 132 | shader/shader_ir.h | 130 | shader/shader_ir.h |
| 133 | shader/track.cpp | 131 | shader/track.cpp |
| 132 | shader/transform_feedback.cpp | ||
| 133 | shader/transform_feedback.h | ||
| 134 | surface.cpp | 134 | surface.cpp |
| 135 | surface.h | 135 | surface.h |
| 136 | texture_cache/format_lookup_table.cpp | 136 | texture_cache/format_lookup_table.cpp |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index d56a47710..724ee0fd6 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -16,11 +16,12 @@ namespace Tegra::Engines { | |||
| 16 | 16 | ||
| 17 | struct SamplerDescriptor { | 17 | struct SamplerDescriptor { |
| 18 | union { | 18 | union { |
| 19 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; | 19 | u32 raw = 0; |
| 20 | BitField<20, 1, u32> is_array; | 20 | BitField<0, 2, Tegra::Shader::TextureType> texture_type; |
| 21 | BitField<21, 1, u32> is_buffer; | 21 | BitField<2, 3, Tegra::Texture::ComponentType> component_type; |
| 22 | BitField<22, 1, u32> is_shadow; | 22 | BitField<5, 1, u32> is_array; |
| 23 | u32 raw{}; | 23 | BitField<6, 1, u32> is_buffer; |
| 24 | BitField<7, 1, u32> is_shadow; | ||
| 24 | }; | 25 | }; |
| 25 | 26 | ||
| 26 | bool operator==(const SamplerDescriptor& rhs) const noexcept { | 27 | bool operator==(const SamplerDescriptor& rhs) const noexcept { |
| @@ -31,68 +32,48 @@ struct SamplerDescriptor { | |||
| 31 | return !operator==(rhs); | 32 | return !operator==(rhs); |
| 32 | } | 33 | } |
| 33 | 34 | ||
| 34 | static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { | 35 | static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { |
| 36 | using Tegra::Shader::TextureType; | ||
| 35 | SamplerDescriptor result; | 37 | SamplerDescriptor result; |
| 36 | switch (tic_texture_type) { | 38 | |
| 39 | // This is going to be used to determine the shading language type. | ||
| 40 | // Because of that we don't care about all component types on color textures. | ||
| 41 | result.component_type.Assign(tic.r_type.Value()); | ||
| 42 | |||
| 43 | switch (tic.texture_type.Value()) { | ||
| 37 | case Tegra::Texture::TextureType::Texture1D: | 44 | case Tegra::Texture::TextureType::Texture1D: |
| 38 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 45 | result.texture_type.Assign(TextureType::Texture1D); |
| 39 | result.is_array.Assign(0); | ||
| 40 | result.is_buffer.Assign(0); | ||
| 41 | result.is_shadow.Assign(0); | ||
| 42 | return result; | 46 | return result; |
| 43 | case Tegra::Texture::TextureType::Texture2D: | 47 | case Tegra::Texture::TextureType::Texture2D: |
| 44 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 48 | result.texture_type.Assign(TextureType::Texture2D); |
| 45 | result.is_array.Assign(0); | ||
| 46 | result.is_buffer.Assign(0); | ||
| 47 | result.is_shadow.Assign(0); | ||
| 48 | return result; | 49 | return result; |
| 49 | case Tegra::Texture::TextureType::Texture3D: | 50 | case Tegra::Texture::TextureType::Texture3D: |
| 50 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); | 51 | result.texture_type.Assign(TextureType::Texture3D); |
| 51 | result.is_array.Assign(0); | ||
| 52 | result.is_buffer.Assign(0); | ||
| 53 | result.is_shadow.Assign(0); | ||
| 54 | return result; | 52 | return result; |
| 55 | case Tegra::Texture::TextureType::TextureCubemap: | 53 | case Tegra::Texture::TextureType::TextureCubemap: |
| 56 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | 54 | result.texture_type.Assign(TextureType::TextureCube); |
| 57 | result.is_array.Assign(0); | ||
| 58 | result.is_buffer.Assign(0); | ||
| 59 | result.is_shadow.Assign(0); | ||
| 60 | return result; | 55 | return result; |
| 61 | case Tegra::Texture::TextureType::Texture1DArray: | 56 | case Tegra::Texture::TextureType::Texture1DArray: |
| 62 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 57 | result.texture_type.Assign(TextureType::Texture1D); |
| 63 | result.is_array.Assign(1); | 58 | result.is_array.Assign(1); |
| 64 | result.is_buffer.Assign(0); | ||
| 65 | result.is_shadow.Assign(0); | ||
| 66 | return result; | 59 | return result; |
| 67 | case Tegra::Texture::TextureType::Texture2DArray: | 60 | case Tegra::Texture::TextureType::Texture2DArray: |
| 68 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 61 | result.texture_type.Assign(TextureType::Texture2D); |
| 69 | result.is_array.Assign(1); | 62 | result.is_array.Assign(1); |
| 70 | result.is_buffer.Assign(0); | ||
| 71 | result.is_shadow.Assign(0); | ||
| 72 | return result; | 63 | return result; |
| 73 | case Tegra::Texture::TextureType::Texture1DBuffer: | 64 | case Tegra::Texture::TextureType::Texture1DBuffer: |
| 74 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 65 | result.texture_type.Assign(TextureType::Texture1D); |
| 75 | result.is_array.Assign(0); | ||
| 76 | result.is_buffer.Assign(1); | 66 | result.is_buffer.Assign(1); |
| 77 | result.is_shadow.Assign(0); | ||
| 78 | return result; | 67 | return result; |
| 79 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | 68 | case Tegra::Texture::TextureType::Texture2DNoMipmap: |
| 80 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 69 | result.texture_type.Assign(TextureType::Texture2D); |
| 81 | result.is_array.Assign(0); | ||
| 82 | result.is_buffer.Assign(0); | ||
| 83 | result.is_shadow.Assign(0); | ||
| 84 | return result; | 70 | return result; |
| 85 | case Tegra::Texture::TextureType::TextureCubeArray: | 71 | case Tegra::Texture::TextureType::TextureCubeArray: |
| 86 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | 72 | result.texture_type.Assign(TextureType::TextureCube); |
| 87 | result.is_array.Assign(1); | 73 | result.is_array.Assign(1); |
| 88 | result.is_buffer.Assign(0); | ||
| 89 | result.is_shadow.Assign(0); | ||
| 90 | return result; | 74 | return result; |
| 91 | default: | 75 | default: |
| 92 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 76 | result.texture_type.Assign(TextureType::Texture2D); |
| 93 | result.is_array.Assign(0); | ||
| 94 | result.is_buffer.Assign(0); | ||
| 95 | result.is_shadow.Assign(0); | ||
| 96 | return result; | 77 | return result; |
| 97 | } | 78 | } |
| 98 | } | 79 | } |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ae52afa79..1ecd65925 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 89 | 89 | ||
| 90 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 90 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 91 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 91 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 92 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | 92 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 93 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 93 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
| 94 | return result; | 94 | return result; |
| 95 | } | 95 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 89050361e..ce536e29b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -638,7 +638,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 638 | 638 | ||
| 639 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 639 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 640 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 640 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 641 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | 641 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 642 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 642 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
| 643 | return result; | 643 | return result; |
| 644 | } | 644 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ed7fc8fdd..8a9e9992e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | static constexpr std::size_t NumVaryings = 31; | 67 | static constexpr std::size_t NumVaryings = 31; |
| 68 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number | 68 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number |
| 69 | static constexpr std::size_t NumClipDistances = 8; | 69 | static constexpr std::size_t NumClipDistances = 8; |
| 70 | static constexpr std::size_t NumTransformFeedbackBuffers = 4; | ||
| 70 | static constexpr std::size_t MaxShaderProgram = 6; | 71 | static constexpr std::size_t MaxShaderProgram = 6; |
| 71 | static constexpr std::size_t MaxShaderStage = 5; | 72 | static constexpr std::size_t MaxShaderStage = 5; |
| 72 | // Maximum number of const buffers per shader stage. | 73 | // Maximum number of const buffers per shader stage. |
| @@ -627,6 +628,29 @@ public: | |||
| 627 | float depth_range_far; | 628 | float depth_range_far; |
| 628 | }; | 629 | }; |
| 629 | 630 | ||
| 631 | struct TransformFeedbackBinding { | ||
| 632 | u32 buffer_enable; | ||
| 633 | u32 address_high; | ||
| 634 | u32 address_low; | ||
| 635 | s32 buffer_size; | ||
| 636 | s32 buffer_offset; | ||
| 637 | INSERT_UNION_PADDING_WORDS(3); | ||
| 638 | |||
| 639 | GPUVAddr Address() const { | ||
| 640 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 641 | address_low); | ||
| 642 | } | ||
| 643 | }; | ||
| 644 | static_assert(sizeof(TransformFeedbackBinding) == 32); | ||
| 645 | |||
| 646 | struct TransformFeedbackLayout { | ||
| 647 | u32 stream; | ||
| 648 | u32 varying_count; | ||
| 649 | u32 stride; | ||
| 650 | INSERT_UNION_PADDING_WORDS(1); | ||
| 651 | }; | ||
| 652 | static_assert(sizeof(TransformFeedbackLayout) == 16); | ||
| 653 | |||
| 630 | bool IsShaderConfigEnabled(std::size_t index) const { | 654 | bool IsShaderConfigEnabled(std::size_t index) const { |
| 631 | // The VertexB is always enabled. | 655 | // The VertexB is always enabled. |
| 632 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { | 656 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { |
| @@ -635,6 +659,10 @@ public: | |||
| 635 | return shader_config[index].enable != 0; | 659 | return shader_config[index].enable != 0; |
| 636 | } | 660 | } |
| 637 | 661 | ||
| 662 | bool IsShaderConfigEnabled(Regs::ShaderProgram type) const { | ||
| 663 | return IsShaderConfigEnabled(static_cast<std::size_t>(type)); | ||
| 664 | } | ||
| 665 | |||
| 638 | union { | 666 | union { |
| 639 | struct { | 667 | struct { |
| 640 | INSERT_UNION_PADDING_WORDS(0x45); | 668 | INSERT_UNION_PADDING_WORDS(0x45); |
| @@ -683,7 +711,13 @@ public: | |||
| 683 | 711 | ||
| 684 | u32 rasterize_enable; | 712 | u32 rasterize_enable; |
| 685 | 713 | ||
| 686 | INSERT_UNION_PADDING_WORDS(0xF1); | 714 | std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings; |
| 715 | |||
| 716 | INSERT_UNION_PADDING_WORDS(0xC0); | ||
| 717 | |||
| 718 | std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts; | ||
| 719 | |||
| 720 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 687 | 721 | ||
| 688 | u32 tfb_enabled; | 722 | u32 tfb_enabled; |
| 689 | 723 | ||
| @@ -1202,7 +1236,11 @@ public: | |||
| 1202 | 1236 | ||
| 1203 | u32 tex_cb_index; | 1237 | u32 tex_cb_index; |
| 1204 | 1238 | ||
| 1205 | INSERT_UNION_PADDING_WORDS(0x395); | 1239 | INSERT_UNION_PADDING_WORDS(0x7D); |
| 1240 | |||
| 1241 | std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs; | ||
| 1242 | |||
| 1243 | INSERT_UNION_PADDING_WORDS(0x298); | ||
| 1206 | 1244 | ||
| 1207 | struct { | 1245 | struct { |
| 1208 | /// Compressed address of a buffer that holds information about bound SSBOs. | 1246 | /// Compressed address of a buffer that holds information about bound SSBOs. |
| @@ -1428,6 +1466,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8); | |||
| 1428 | ASSERT_REG_POSITION(tess_level_outer, 0xC9); | 1466 | ASSERT_REG_POSITION(tess_level_outer, 0xC9); |
| 1429 | ASSERT_REG_POSITION(tess_level_inner, 0xCD); | 1467 | ASSERT_REG_POSITION(tess_level_inner, 0xCD); |
| 1430 | ASSERT_REG_POSITION(rasterize_enable, 0xDF); | 1468 | ASSERT_REG_POSITION(rasterize_enable, 0xDF); |
| 1469 | ASSERT_REG_POSITION(tfb_bindings, 0xE0); | ||
| 1470 | ASSERT_REG_POSITION(tfb_layouts, 0x1C0); | ||
| 1431 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); | 1471 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); |
| 1432 | ASSERT_REG_POSITION(rt, 0x200); | 1472 | ASSERT_REG_POSITION(rt, 0x200); |
| 1433 | ASSERT_REG_POSITION(viewport_transform, 0x280); | 1473 | ASSERT_REG_POSITION(viewport_transform, 0x280); |
| @@ -1526,6 +1566,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0); | |||
| 1526 | ASSERT_REG_POSITION(const_buffer, 0x8E0); | 1566 | ASSERT_REG_POSITION(const_buffer, 0x8E0); |
| 1527 | ASSERT_REG_POSITION(cb_bind[0], 0x904); | 1567 | ASSERT_REG_POSITION(cb_bind[0], 0x904); |
| 1528 | ASSERT_REG_POSITION(tex_cb_index, 0x982); | 1568 | ASSERT_REG_POSITION(tex_cb_index, 0x982); |
| 1569 | ASSERT_REG_POSITION(tfb_varying_locs, 0xA00); | ||
| 1529 | ASSERT_REG_POSITION(ssbo_info, 0xD18); | 1570 | ASSERT_REG_POSITION(ssbo_info, 0xD18); |
| 1530 | ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); | 1571 | ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); |
| 1531 | ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); | 1572 | ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index 6adef459e..f058f2744 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp | |||
| @@ -4,13 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <vector> | ||
| 7 | 8 | ||
| 9 | #include "common/common_types.h" | ||
| 8 | #include "video_core/guest_driver.h" | 10 | #include "video_core/guest_driver.h" |
| 9 | 11 | ||
| 10 | namespace VideoCore { | 12 | namespace VideoCore { |
| 11 | 13 | ||
| 12 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | 14 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { |
| 13 | if (texture_handler_size_deduced) { | 15 | if (texture_handler_size) { |
| 14 | return; | 16 | return; |
| 15 | } | 17 | } |
| 16 | const std::size_t size = bound_offsets.size(); | 18 | const std::size_t size = bound_offsets.size(); |
| @@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse | |||
| 29 | if (min_val > 2) { | 31 | if (min_val > 2) { |
| 30 | return; | 32 | return; |
| 31 | } | 33 | } |
| 32 | texture_handler_size_deduced = true; | ||
| 33 | texture_handler_size = min_texture_handler_size * min_val; | 34 | texture_handler_size = min_texture_handler_size * min_val; |
| 34 | } | 35 | } |
| 35 | 36 | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index fc1917347..99450777e 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -17,25 +18,29 @@ namespace VideoCore { | |||
| 17 | */ | 18 | */ |
| 18 | class GuestDriverProfile { | 19 | class GuestDriverProfile { |
| 19 | public: | 20 | public: |
| 20 | void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | 21 | explicit GuestDriverProfile() = default; |
| 22 | explicit GuestDriverProfile(std::optional<u32> texture_handler_size) | ||
| 23 | : texture_handler_size{texture_handler_size} {} | ||
| 24 | |||
| 25 | void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); | ||
| 21 | 26 | ||
| 22 | u32 GetTextureHandlerSize() const { | 27 | u32 GetTextureHandlerSize() const { |
| 23 | return texture_handler_size; | 28 | return texture_handler_size.value_or(default_texture_handler_size); |
| 24 | } | 29 | } |
| 25 | 30 | ||
| 26 | bool TextureHandlerSizeKnown() const { | 31 | bool IsTextureHandlerSizeKnown() const { |
| 27 | return texture_handler_size_deduced; | 32 | return texture_handler_size.has_value(); |
| 28 | } | 33 | } |
| 29 | 34 | ||
| 30 | private: | 35 | private: |
| 31 | // Minimum size of texture handler any driver can use. | 36 | // Minimum size of texture handler any driver can use. |
| 32 | static constexpr u32 min_texture_handler_size = 4; | 37 | static constexpr u32 min_texture_handler_size = 4; |
| 33 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily | 38 | |
| 34 | // use 4 bytes instead. Thus, certain drivers may squish the size. | 39 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. |
| 40 | // Thus, certain drivers may squish the size. | ||
| 35 | static constexpr u32 default_texture_handler_size = 8; | 41 | static constexpr u32 default_texture_handler_size = 8; |
| 36 | 42 | ||
| 37 | u32 texture_handler_size = default_texture_handler_size; | 43 | std::optional<u32> texture_handler_size = default_texture_handler_size; |
| 38 | bool texture_handler_size_deduced = false; | ||
| 39 | }; | 44 | }; |
| 40 | 45 | ||
| 41 | } // namespace VideoCore | 46 | } // namespace VideoCore |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3e4514b94..1a68e3caa 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1; | |||
| 25 | 25 | ||
| 26 | enum class LoadCallbackStage { | 26 | enum class LoadCallbackStage { |
| 27 | Prepare, | 27 | Prepare, |
| 28 | Decompile, | ||
| 29 | Build, | 28 | Build, |
| 30 | Complete, | 29 | Complete, |
| 31 | }; | 30 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8e48a6482..1af4268a4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 32 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 33 | #include "video_core/renderer_opengl/renderer_opengl.h" | 32 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 34 | 33 | ||
| @@ -76,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 76 | } | 75 | } |
| 77 | 76 | ||
| 78 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 77 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| 79 | const GLShader::ConstBufferEntry& entry) { | 78 | const ConstBufferEntry& entry) { |
| 80 | if (!entry.IsIndirect()) { | 79 | if (!entry.IsIndirect()) { |
| 81 | return entry.GetSize(); | 80 | return entry.GetSize(); |
| 82 | } | 81 | } |
| @@ -272,9 +271,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 272 | SetupDrawTextures(stage, shader); | 271 | SetupDrawTextures(stage, shader); |
| 273 | SetupDrawImages(stage, shader); | 272 | SetupDrawImages(stage, shader); |
| 274 | 273 | ||
| 275 | const ProgramVariant variant(primitive_mode); | 274 | const GLuint program_handle = shader->GetHandle(); |
| 276 | const auto program_handle = shader->GetHandle(variant); | ||
| 277 | |||
| 278 | switch (program) { | 275 | switch (program) { |
| 279 | case Maxwell::ShaderProgram::VertexA: | 276 | case Maxwell::ShaderProgram::VertexA: |
| 280 | case Maxwell::ShaderProgram::VertexB: | 277 | case Maxwell::ShaderProgram::VertexB: |
| @@ -295,7 +292,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 295 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 292 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 296 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 293 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| 297 | // clip distances only when it's written by a shader stage. | 294 | // clip distances only when it's written by a shader stage. |
| 298 | clip_distances |= shader->GetShaderEntries().clip_distances; | 295 | clip_distances |= shader->GetEntries().clip_distances; |
| 299 | 296 | ||
| 300 | // When VertexA is enabled, we have dual vertex shaders | 297 | // When VertexA is enabled, we have dual vertex shaders |
| 301 | if (program == Maxwell::ShaderProgram::VertexA) { | 298 | if (program == Maxwell::ShaderProgram::VertexA) { |
| @@ -499,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 499 | SyncCullMode(); | 496 | SyncCullMode(); |
| 500 | SyncPrimitiveRestart(); | 497 | SyncPrimitiveRestart(); |
| 501 | SyncScissorTest(); | 498 | SyncScissorTest(); |
| 502 | SyncTransformFeedback(); | ||
| 503 | SyncPointState(); | 499 | SyncPointState(); |
| 504 | SyncPolygonOffset(); | 500 | SyncPolygonOffset(); |
| 505 | SyncAlphaTest(); | 501 | SyncAlphaTest(); |
| @@ -572,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 572 | glTextureBarrier(); | 568 | glTextureBarrier(); |
| 573 | } | 569 | } |
| 574 | 570 | ||
| 575 | ++num_queued_commands; | 571 | BeginTransformFeedback(primitive_mode); |
| 576 | 572 | ||
| 577 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); | 573 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); |
| 578 | const GLsizei num_instances = | 574 | const GLsizei num_instances = |
| @@ -611,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 611 | num_instances, base_instance); | 607 | num_instances, base_instance); |
| 612 | } | 608 | } |
| 613 | } | 609 | } |
| 610 | |||
| 611 | EndTransformFeedback(); | ||
| 612 | |||
| 613 | ++num_queued_commands; | ||
| 614 | } | 614 | } |
| 615 | 615 | ||
| 616 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 616 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -623,12 +623,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 623 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 623 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 624 | SetupComputeTextures(kernel); | 624 | SetupComputeTextures(kernel); |
| 625 | SetupComputeImages(kernel); | 625 | SetupComputeImages(kernel); |
| 626 | 626 | program_manager.BindComputeShader(kernel->GetHandle()); | |
| 627 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 628 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, | ||
| 629 | launch_desc.block_dim_z, launch_desc.shared_alloc, | ||
| 630 | launch_desc.local_pos_alloc); | ||
| 631 | program_manager.BindComputeShader(kernel->GetHandle(variant)); | ||
| 632 | 627 | ||
| 633 | const std::size_t buffer_size = | 628 | const std::size_t buffer_size = |
| 634 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 629 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -646,6 +641,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 646 | bind_ubo_pushbuffer.Bind(); | 641 | bind_ubo_pushbuffer.Bind(); |
| 647 | bind_ssbo_pushbuffer.Bind(); | 642 | bind_ssbo_pushbuffer.Bind(); |
| 648 | 643 | ||
| 644 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 649 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 645 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 650 | ++num_queued_commands; | 646 | ++num_queued_commands; |
| 651 | } | 647 | } |
| @@ -750,7 +746,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad | |||
| 750 | const auto& shader_stage = stages[stage_index]; | 746 | const auto& shader_stage = stages[stage_index]; |
| 751 | 747 | ||
| 752 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 748 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; |
| 753 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 749 | for (const auto& entry : shader->GetEntries().const_buffers) { |
| 754 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 750 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 755 | SetupConstBuffer(binding++, buffer, entry); | 751 | SetupConstBuffer(binding++, buffer, entry); |
| 756 | } | 752 | } |
| @@ -761,7 +757,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 761 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 757 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 762 | 758 | ||
| 763 | u32 binding = 0; | 759 | u32 binding = 0; |
| 764 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | 760 | for (const auto& entry : kernel->GetEntries().const_buffers) { |
| 765 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 761 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 766 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 762 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 767 | Tegra::Engines::ConstBufferInfo buffer; | 763 | Tegra::Engines::ConstBufferInfo buffer; |
| @@ -773,7 +769,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 773 | } | 769 | } |
| 774 | 770 | ||
| 775 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 771 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 776 | const GLShader::ConstBufferEntry& entry) { | 772 | const ConstBufferEntry& entry) { |
| 777 | if (!buffer.enabled) { | 773 | if (!buffer.enabled) { |
| 778 | // Set values to zero to unbind buffers | 774 | // Set values to zero to unbind buffers |
| 779 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 775 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, |
| @@ -797,7 +793,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 797 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 793 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 798 | 794 | ||
| 799 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 795 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; |
| 800 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 796 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 801 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 797 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 802 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 798 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 803 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 799 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -811,7 +807,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 811 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 807 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 812 | 808 | ||
| 813 | u32 binding = 0; | 809 | u32 binding = 0; |
| 814 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | 810 | for (const auto& entry : kernel->GetEntries().global_memory_entries) { |
| 815 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | 811 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; |
| 816 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 812 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 817 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 813 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -819,7 +815,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 819 | } | 815 | } |
| 820 | } | 816 | } |
| 821 | 817 | ||
| 822 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, | 818 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 823 | GPUVAddr gpu_addr, std::size_t size) { | 819 | GPUVAddr gpu_addr, std::size_t size) { |
| 824 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 820 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 825 | const auto [ssbo, buffer_offset] = | 821 | const auto [ssbo, buffer_offset] = |
| @@ -831,7 +827,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 831 | MICROPROFILE_SCOPE(OpenGL_Texture); | 827 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 832 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 828 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 833 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 829 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 834 | for (const auto& entry : shader->GetShaderEntries().samplers) { | 830 | for (const auto& entry : shader->GetEntries().samplers) { |
| 835 | const auto shader_type = static_cast<ShaderType>(stage_index); | 831 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 836 | for (std::size_t i = 0; i < entry.Size(); ++i) { | 832 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 837 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | 833 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); |
| @@ -844,7 +840,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 844 | MICROPROFILE_SCOPE(OpenGL_Texture); | 840 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 845 | const auto& compute = system.GPU().KeplerCompute(); | 841 | const auto& compute = system.GPU().KeplerCompute(); |
| 846 | u32 binding = 0; | 842 | u32 binding = 0; |
| 847 | for (const auto& entry : kernel->GetShaderEntries().samplers) { | 843 | for (const auto& entry : kernel->GetEntries().samplers) { |
| 848 | for (std::size_t i = 0; i < entry.Size(); ++i) { | 844 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 849 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); | 845 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); |
| 850 | SetupTexture(binding++, texture, entry); | 846 | SetupTexture(binding++, texture, entry); |
| @@ -853,7 +849,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 853 | } | 849 | } |
| 854 | 850 | ||
| 855 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 851 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 856 | const GLShader::SamplerEntry& entry) { | 852 | const SamplerEntry& entry) { |
| 857 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 853 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 858 | if (!view) { | 854 | if (!view) { |
| 859 | // Can occur when texture addr is null or its memory is unmapped/invalid | 855 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| @@ -876,7 +872,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 876 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 872 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { |
| 877 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 873 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 878 | u32 binding = device.GetBaseBindings(stage_index).image; | 874 | u32 binding = device.GetBaseBindings(stage_index).image; |
| 879 | for (const auto& entry : shader->GetShaderEntries().images) { | 875 | for (const auto& entry : shader->GetEntries().images) { |
| 880 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 876 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 881 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 877 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; |
| 882 | SetupImage(binding++, tic, entry); | 878 | SetupImage(binding++, tic, entry); |
| @@ -886,14 +882,14 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 886 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 882 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { |
| 887 | const auto& compute = system.GPU().KeplerCompute(); | 883 | const auto& compute = system.GPU().KeplerCompute(); |
| 888 | u32 binding = 0; | 884 | u32 binding = 0; |
| 889 | for (const auto& entry : shader->GetShaderEntries().images) { | 885 | for (const auto& entry : shader->GetEntries().images) { |
| 890 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; | 886 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; |
| 891 | SetupImage(binding++, tic, entry); | 887 | SetupImage(binding++, tic, entry); |
| 892 | } | 888 | } |
| 893 | } | 889 | } |
| 894 | 890 | ||
| 895 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 891 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, |
| 896 | const GLShader::ImageEntry& entry) { | 892 | const ImageEntry& entry) { |
| 897 | const auto view = texture_cache.GetImageSurface(tic, entry); | 893 | const auto view = texture_cache.GetImageSurface(tic, entry); |
| 898 | if (!view) { | 894 | if (!view) { |
| 899 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | 895 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); |
| @@ -1297,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() { | |||
| 1297 | } | 1293 | } |
| 1298 | } | 1294 | } |
| 1299 | 1295 | ||
| 1300 | void RasterizerOpenGL::SyncTransformFeedback() { | ||
| 1301 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1302 | UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); | ||
| 1303 | } | ||
| 1304 | |||
| 1305 | void RasterizerOpenGL::SyncPointState() { | 1296 | void RasterizerOpenGL::SyncPointState() { |
| 1306 | auto& gpu = system.GPU().Maxwell3D(); | 1297 | auto& gpu = system.GPU().Maxwell3D(); |
| 1307 | auto& flags = gpu.dirty.flags; | 1298 | auto& flags = gpu.dirty.flags; |
| @@ -1377,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { | |||
| 1377 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); | 1368 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); |
| 1378 | } | 1369 | } |
| 1379 | 1370 | ||
| 1371 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | ||
| 1372 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1373 | if (regs.tfb_enabled == 0) { | ||
| 1374 | return; | ||
| 1375 | } | ||
| 1376 | |||
| 1377 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 1378 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 1379 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 1380 | |||
| 1381 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1382 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1383 | if (!binding.buffer_enable) { | ||
| 1384 | if (enabled_transform_feedback_buffers[index]) { | ||
| 1385 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||
| 1386 | 0); | ||
| 1387 | } | ||
| 1388 | enabled_transform_feedback_buffers[index] = false; | ||
| 1389 | continue; | ||
| 1390 | } | ||
| 1391 | enabled_transform_feedback_buffers[index] = true; | ||
| 1392 | |||
| 1393 | auto& tfb_buffer = transform_feedback_buffers[index]; | ||
| 1394 | tfb_buffer.Create(); | ||
| 1395 | |||
| 1396 | const GLuint handle = tfb_buffer.handle; | ||
| 1397 | const std::size_t size = binding.buffer_size; | ||
| 1398 | glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||
| 1399 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||
| 1400 | static_cast<GLsizeiptr>(size)); | ||
| 1401 | } | ||
| 1402 | |||
| 1403 | glBeginTransformFeedback(GL_POINTS); | ||
| 1404 | } | ||
| 1405 | |||
| 1406 | void RasterizerOpenGL::EndTransformFeedback() { | ||
| 1407 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1408 | if (regs.tfb_enabled == 0) { | ||
| 1409 | return; | ||
| 1410 | } | ||
| 1411 | |||
| 1412 | glEndTransformFeedback(); | ||
| 1413 | |||
| 1414 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1415 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1416 | if (!binding.buffer_enable) { | ||
| 1417 | continue; | ||
| 1418 | } | ||
| 1419 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 1420 | |||
| 1421 | const GLuint handle = transform_feedback_buffers[index].handle; | ||
| 1422 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 1423 | const std::size_t size = binding.buffer_size; | ||
| 1424 | const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 1425 | glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); | ||
| 1426 | } | ||
| 1427 | } | ||
| 1428 | |||
| 1380 | } // namespace OpenGL | 1429 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e5681d6df..2d3be2437 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -98,7 +98,7 @@ private: | |||
| 98 | 98 | ||
| 99 | /// Configures a constant buffer. | 99 | /// Configures a constant buffer. |
| 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 101 | const GLShader::ConstBufferEntry& entry); | 101 | const ConstBufferEntry& entry); |
| 102 | 102 | ||
| 103 | /// Configures the current global memory entries to use for the draw command. | 103 | /// Configures the current global memory entries to use for the draw command. |
| 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); |
| @@ -107,7 +107,7 @@ private: | |||
| 107 | void SetupComputeGlobalMemory(const Shader& kernel); | 107 | void SetupComputeGlobalMemory(const Shader& kernel); |
| 108 | 108 | ||
| 109 | /// Configures a constant buffer. | 109 | /// Configures a constant buffer. |
| 110 | void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 110 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 111 | std::size_t size); | 111 | std::size_t size); |
| 112 | 112 | ||
| 113 | /// Configures the current textures to use for the draw command. | 113 | /// Configures the current textures to use for the draw command. |
| @@ -118,7 +118,7 @@ private: | |||
| 118 | 118 | ||
| 119 | /// Configures a texture. | 119 | /// Configures a texture. |
| 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 121 | const GLShader::SamplerEntry& entry); | 121 | const SamplerEntry& entry); |
| 122 | 122 | ||
| 123 | /// Configures images in a graphics shader. | 123 | /// Configures images in a graphics shader. |
| 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); |
| @@ -127,8 +127,7 @@ private: | |||
| 127 | void SetupComputeImages(const Shader& shader); | 127 | void SetupComputeImages(const Shader& shader); |
| 128 | 128 | ||
| 129 | /// Configures an image. | 129 | /// Configures an image. |
| 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 131 | const GLShader::ImageEntry& entry); | ||
| 132 | 131 | ||
| 133 | /// Syncs the viewport and depth range to match the guest state | 132 | /// Syncs the viewport and depth range to match the guest state |
| 134 | void SyncViewport(); | 133 | void SyncViewport(); |
| @@ -169,9 +168,6 @@ private: | |||
| 169 | /// Syncs the scissor test state to match the guest state | 168 | /// Syncs the scissor test state to match the guest state |
| 170 | void SyncScissorTest(); | 169 | void SyncScissorTest(); |
| 171 | 170 | ||
| 172 | /// Syncs the transform feedback state to match the guest state | ||
| 173 | void SyncTransformFeedback(); | ||
| 174 | |||
| 175 | /// Syncs the point state to match the guest state | 171 | /// Syncs the point state to match the guest state |
| 176 | void SyncPointState(); | 172 | void SyncPointState(); |
| 177 | 173 | ||
| @@ -193,6 +189,12 @@ private: | |||
| 193 | /// Syncs the framebuffer sRGB state to match the guest state | 189 | /// Syncs the framebuffer sRGB state to match the guest state |
| 194 | void SyncFramebufferSRGB(); | 190 | void SyncFramebufferSRGB(); |
| 195 | 191 | ||
| 192 | /// Begin a transform feedback | ||
| 193 | void BeginTransformFeedback(GLenum primitive_mode); | ||
| 194 | |||
| 195 | /// End a transform feedback | ||
| 196 | void EndTransformFeedback(); | ||
| 197 | |||
| 196 | /// Check for extension that are not strictly required but are needed for correct emulation | 198 | /// Check for extension that are not strictly required but are needed for correct emulation |
| 197 | void CheckExtensions(); | 199 | void CheckExtensions(); |
| 198 | 200 | ||
| @@ -230,6 +232,11 @@ private: | |||
| 230 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 232 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 231 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 233 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 232 | 234 | ||
| 235 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 236 | transform_feedback_buffers; | ||
| 237 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 238 | enabled_transform_feedback_buffers; | ||
| 239 | |||
| 233 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 240 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 234 | std::size_t num_queued_commands = 0; | 241 | std::size_t num_queued_commands = 0; |
| 235 | 242 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cb89db8c..e3d31c3eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -2,12 +2,16 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <functional> | ||
| 5 | #include <mutex> | 7 | #include <mutex> |
| 6 | #include <optional> | 8 | #include <optional> |
| 7 | #include <string> | 9 | #include <string> |
| 8 | #include <thread> | 10 | #include <thread> |
| 9 | #include <unordered_set> | 11 | #include <unordered_set> |
| 12 | |||
| 10 | #include <boost/functional/hash.hpp> | 13 | #include <boost/functional/hash.hpp> |
| 14 | |||
| 11 | #include "common/alignment.h" | 15 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | 16 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| @@ -24,13 +28,14 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 26 | #include "video_core/renderer_opengl/utils.h" | 30 | #include "video_core/renderer_opengl/utils.h" |
| 31 | #include "video_core/shader/registry.h" | ||
| 27 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
| 28 | 33 | ||
| 29 | namespace OpenGL { | 34 | namespace OpenGL { |
| 30 | 35 | ||
| 31 | using Tegra::Engines::ShaderType; | 36 | using Tegra::Engines::ShaderType; |
| 32 | using VideoCommon::Shader::ConstBufferLocker; | ||
| 33 | using VideoCommon::Shader::ProgramCode; | 37 | using VideoCommon::Shader::ProgramCode; |
| 38 | using VideoCommon::Shader::Registry; | ||
| 34 | using VideoCommon::Shader::ShaderIR; | 39 | using VideoCommon::Shader::ShaderIR; |
| 35 | 40 | ||
| 36 | namespace { | 41 | namespace { |
| @@ -56,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | |||
| 56 | } | 61 | } |
| 57 | 62 | ||
| 58 | /// Calculates the size of a program stream | 63 | /// Calculates the size of a program stream |
| 59 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 64 | std::size_t CalculateProgramSize(const ProgramCode& program) { |
| 60 | constexpr std::size_t start_offset = 10; | 65 | constexpr std::size_t start_offset = 10; |
| 61 | // This is the encoded version of BRA that jumps to itself. All Nvidia | 66 | // This is the encoded version of BRA that jumps to itself. All Nvidia |
| 62 | // shaders end with one. | 67 | // shaders end with one. |
| @@ -109,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { | |||
| 109 | } | 114 | } |
| 110 | } | 115 | } |
| 111 | 116 | ||
| 112 | /// Describes primitive behavior on geometry shaders | ||
| 113 | constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | ||
| 114 | switch (primitive_mode) { | ||
| 115 | case GL_POINTS: | ||
| 116 | return {"points", 1}; | ||
| 117 | case GL_LINES: | ||
| 118 | case GL_LINE_STRIP: | ||
| 119 | return {"lines", 2}; | ||
| 120 | case GL_LINES_ADJACENCY: | ||
| 121 | case GL_LINE_STRIP_ADJACENCY: | ||
| 122 | return {"lines_adjacency", 4}; | ||
| 123 | case GL_TRIANGLES: | ||
| 124 | case GL_TRIANGLE_STRIP: | ||
| 125 | case GL_TRIANGLE_FAN: | ||
| 126 | return {"triangles", 3}; | ||
| 127 | case GL_TRIANGLES_ADJACENCY: | ||
| 128 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 129 | return {"triangles_adjacency", 6}; | ||
| 130 | default: | ||
| 131 | return {"points", 1}; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | /// Hashes one (or two) program streams | 117 | /// Hashes one (or two) program streams |
| 136 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, | 118 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, |
| 137 | const ProgramCode& code_b) { | 119 | const ProgramCode& code_b = {}) { |
| 138 | u64 unique_identifier = boost::hash_value(code); | 120 | u64 unique_identifier = boost::hash_value(code); |
| 139 | if (is_a) { | 121 | if (is_a) { |
| 140 | // VertexA programs include two programs | 122 | // VertexA programs include two programs |
| @@ -143,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co | |||
| 143 | return unique_identifier; | 125 | return unique_identifier; |
| 144 | } | 126 | } |
| 145 | 127 | ||
| 146 | /// Creates an unspecialized program from code streams | ||
| 147 | std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, | ||
| 148 | const std::optional<ShaderIR>& ir_b) { | ||
| 149 | switch (shader_type) { | ||
| 150 | case ShaderType::Vertex: | ||
| 151 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); | ||
| 152 | case ShaderType::Geometry: | ||
| 153 | return GLShader::GenerateGeometryShader(device, ir); | ||
| 154 | case ShaderType::Fragment: | ||
| 155 | return GLShader::GenerateFragmentShader(device, ir); | ||
| 156 | case ShaderType::Compute: | ||
| 157 | return GLShader::GenerateComputeShader(device, ir); | ||
| 158 | default: | ||
| 159 | UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type)); | ||
| 160 | return {}; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | 128 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { |
| 165 | switch (shader_type) { | 129 | switch (shader_type) { |
| 166 | case ShaderType::Vertex: | 130 | case ShaderType::Vertex: |
| @@ -196,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 196 | return {}; | 160 | return {}; |
| 197 | } | 161 | } |
| 198 | 162 | ||
| 199 | std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { | 163 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 200 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 164 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 201 | } | 165 | } |
| 202 | 166 | ||
| 203 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, | 167 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { |
| 204 | ShaderType shader_type) { | 168 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |
| 205 | if (shader_type == ShaderType::Compute) { | 169 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, |
| 206 | return system.GPU().KeplerCompute(); | 170 | entry.graphics_info, entry.compute_info}; |
| 207 | } else { | 171 | const auto registry = std::make_shared<Registry>(entry.type, info); |
| 208 | return system.GPU().Maxwell3D(); | 172 | for (const auto& [address, value] : entry.keys) { |
| 209 | } | 173 | const auto [buffer, offset] = address; |
| 210 | } | 174 | registry->InsertKey(buffer, offset, value); |
| 211 | |||
| 212 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) { | ||
| 213 | return std::make_unique<ConstBufferLocker>(shader_type, | ||
| 214 | GetConstBufferEngineInterface(system, shader_type)); | ||
| 215 | } | ||
| 216 | |||
| 217 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||
| 218 | locker.SetBoundBuffer(usage.bound_buffer); | ||
| 219 | for (const auto& key : usage.keys) { | ||
| 220 | const auto [buffer, offset] = key.first; | ||
| 221 | locker.InsertKey(buffer, offset, key.second); | ||
| 222 | } | 175 | } |
| 223 | for (const auto& [offset, sampler] : usage.bound_samplers) { | 176 | for (const auto& [offset, sampler] : entry.bound_samplers) { |
| 224 | locker.InsertBoundSampler(offset, sampler); | 177 | registry->InsertBoundSampler(offset, sampler); |
| 225 | } | 178 | } |
| 226 | for (const auto& [key, sampler] : usage.bindless_samplers) { | 179 | for (const auto& [key, sampler] : entry.bindless_samplers) { |
| 227 | const auto [buffer, offset] = key; | 180 | const auto [buffer, offset] = key; |
| 228 | locker.InsertBindlessSampler(buffer, offset, sampler); | 181 | registry->InsertBindlessSampler(buffer, offset, sampler); |
| 229 | } | 182 | } |
| 183 | return registry; | ||
| 230 | } | 184 | } |
| 231 | 185 | ||
| 232 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, | 186 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, |
| 233 | const ProgramCode& code, const ProgramCode& code_b, | 187 | u64 unique_identifier, const ShaderIR& ir, |
| 234 | ConstBufferLocker& locker, const ProgramVariant& variant, | 188 | const Registry& registry, bool hint_retrievable = false) { |
| 235 | bool hint_retrievable = false) { | 189 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 236 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); | 190 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 237 | |||
| 238 | const bool is_compute = shader_type == ShaderType::Compute; | ||
| 239 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 240 | const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); | ||
| 241 | std::optional<ShaderIR> ir_b; | ||
| 242 | if (!code_b.empty()) { | ||
| 243 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); | ||
| 244 | } | ||
| 245 | |||
| 246 | std::string source = fmt::format(R"(// {} | ||
| 247 | #version 430 core | ||
| 248 | #extension GL_ARB_separate_shader_objects : enable | ||
| 249 | )", | ||
| 250 | GetShaderId(unique_identifier, shader_type)); | ||
| 251 | if (device.HasShaderBallot()) { | ||
| 252 | source += "#extension GL_ARB_shader_ballot : require\n"; | ||
| 253 | } | ||
| 254 | if (device.HasVertexViewportLayer()) { | ||
| 255 | source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; | ||
| 256 | } | ||
| 257 | if (device.HasImageLoadFormatted()) { | ||
| 258 | source += "#extension GL_EXT_shader_image_load_formatted : require\n"; | ||
| 259 | } | ||
| 260 | if (device.HasWarpIntrinsics()) { | ||
| 261 | source += "#extension GL_NV_gpu_shader5 : require\n" | ||
| 262 | "#extension GL_NV_shader_thread_group : require\n" | ||
| 263 | "#extension GL_NV_shader_thread_shuffle : require\n"; | ||
| 264 | } | ||
| 265 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations) | ||
| 266 | // on places where we don't want to. | ||
| 267 | // Thanks to Ryujinx for finding this workaround. | ||
| 268 | source += "#pragma optionNV(fastmath off)\n"; | ||
| 269 | |||
| 270 | if (shader_type == ShaderType::Geometry) { | ||
| 271 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); | ||
| 272 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); | ||
| 273 | source += fmt::format("layout ({}) in;\n", glsl_topology); | ||
| 274 | } | ||
| 275 | if (shader_type == ShaderType::Compute) { | ||
| 276 | if (variant.local_memory_size > 0) { | ||
| 277 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", | ||
| 278 | Common::AlignUp(variant.local_memory_size, 4) / 4); | ||
| 279 | } | ||
| 280 | source += | ||
| 281 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", | ||
| 282 | variant.block_x, variant.block_y, variant.block_z); | ||
| 283 | |||
| 284 | if (variant.shared_memory_size > 0) { | ||
| 285 | // shared_memory_size is described in number of words | ||
| 286 | source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | source += '\n'; | ||
| 291 | source += GenerateGLSL(device, shader_type, ir, ir_b); | ||
| 292 | 191 | ||
| 192 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 293 | OGLShader shader; | 193 | OGLShader shader; |
| 294 | shader.Create(source.c_str(), GetGLShaderType(shader_type)); | 194 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |
| 295 | 195 | ||
| 296 | auto program = std::make_shared<OGLProgram>(); | 196 | auto program = std::make_shared<OGLProgram>(); |
| 297 | program->Create(true, hint_retrievable, shader.handle); | 197 | program->Create(true, hint_retrievable, shader.handle); |
| @@ -299,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 299 | } | 199 | } |
| 300 | 200 | ||
| 301 | std::unordered_set<GLenum> GetSupportedFormats() { | 201 | std::unordered_set<GLenum> GetSupportedFormats() { |
| 302 | GLint num_formats{}; | 202 | GLint num_formats; |
| 303 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 203 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |
| 304 | 204 | ||
| 305 | std::vector<GLint> formats(num_formats); | 205 | std::vector<GLint> formats(num_formats); |
| @@ -314,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 314 | 214 | ||
| 315 | } // Anonymous namespace | 215 | } // Anonymous namespace |
| 316 | 216 | ||
| 317 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, | 217 | CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 318 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) | 218 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 319 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, | 219 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) |
| 320 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, | 220 | : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 321 | unique_identifier{params.unique_identifier}, shader_type{shader_type}, | 221 | cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} |
| 322 | entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { | 222 | |
| 323 | if (!params.precompiled_variants) { | 223 | CachedShader::~CachedShader() = default; |
| 324 | return; | 224 | |
| 325 | } | 225 | GLuint CachedShader::GetHandle() const { |
| 326 | for (const auto& pair : *params.precompiled_variants) { | 226 | DEBUG_ASSERT(registry->IsConsistent()); |
| 327 | auto locker = MakeLocker(system, shader_type); | 227 | return program->handle; |
| 328 | const auto& usage = pair->first; | ||
| 329 | FillLocker(*locker, usage); | ||
| 330 | |||
| 331 | std::unique_ptr<LockerVariant>* locker_variant = nullptr; | ||
| 332 | const auto it = | ||
| 333 | std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { | ||
| 334 | return variant->locker->HasEqualKeys(*locker); | ||
| 335 | }); | ||
| 336 | if (it == locker_variants.end()) { | ||
| 337 | locker_variant = &locker_variants.emplace_back(); | ||
| 338 | *locker_variant = std::make_unique<LockerVariant>(); | ||
| 339 | locker_variant->get()->locker = std::move(locker); | ||
| 340 | } else { | ||
| 341 | locker_variant = &*it; | ||
| 342 | } | ||
| 343 | locker_variant->get()->programs.emplace(usage.variant, pair->second); | ||
| 344 | } | ||
| 345 | } | 228 | } |
| 346 | 229 | ||
| 347 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 230 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 348 | Maxwell::ShaderProgram program_type, ProgramCode code, | 231 | Maxwell::ShaderProgram program_type, ProgramCode code, |
| 349 | ProgramCode code_b) { | 232 | ProgramCode code_b) { |
| 350 | const auto shader_type = GetShaderType(program_type); | 233 | const auto shader_type = GetShaderType(program_type); |
| 351 | params.disk_cache.SaveRaw( | 234 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 352 | ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); | ||
| 353 | 235 | ||
| 354 | ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); | 236 | auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); |
| 355 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 237 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 356 | // TODO(Rodrigo): Handle VertexA shaders | 238 | // TODO(Rodrigo): Handle VertexA shaders |
| 357 | // std::optional<ShaderIR> ir_b; | 239 | // std::optional<ShaderIR> ir_b; |
| 358 | // if (!code_b.empty()) { | 240 | // if (!code_b.empty()) { |
| 359 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | 241 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); |
| 360 | // } | 242 | // } |
| 361 | return std::shared_ptr<CachedShader>(new CachedShader( | 243 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); |
| 362 | params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); | 244 | |
| 245 | ShaderDiskCacheEntry entry; | ||
| 246 | entry.type = shader_type; | ||
| 247 | entry.code = std::move(code); | ||
| 248 | entry.code_b = std::move(code_b); | ||
| 249 | entry.unique_identifier = params.unique_identifier; | ||
| 250 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 251 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 252 | entry.keys = registry->GetKeys(); | ||
| 253 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 255 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 256 | |||
| 257 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 258 | size_in_bytes, std::move(registry), | ||
| 259 | MakeEntries(ir), std::move(program))); | ||
| 363 | } | 260 | } |
| 364 | 261 | ||
| 365 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 262 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 366 | params.disk_cache.SaveRaw( | 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 367 | ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); | 264 | |
| 368 | 265 | auto& engine = params.system.GPU().KeplerCompute(); | |
| 369 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, | 266 | auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); |
| 370 | params.system.GPU().KeplerCompute()); | 267 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 371 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 268 | const u64 uid = params.unique_identifier; |
| 372 | return std::shared_ptr<CachedShader>(new CachedShader( | 269 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); |
| 373 | params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | 270 | |
| 271 | ShaderDiskCacheEntry entry; | ||
| 272 | entry.type = ShaderType::Compute; | ||
| 273 | entry.code = std::move(code); | ||
| 274 | entry.unique_identifier = uid; | ||
| 275 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 276 | entry.compute_info = registry->GetComputeInfo(); | ||
| 277 | entry.keys = registry->GetKeys(); | ||
| 278 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 279 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 280 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 281 | |||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 283 | size_in_bytes, std::move(registry), | ||
| 284 | MakeEntries(ir), std::move(program))); | ||
| 374 | } | 285 | } |
| 375 | 286 | ||
| 376 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 287 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 377 | const UnspecializedShader& unspecialized) { | 288 | const PrecompiledShader& precompiled_shader, |
| 378 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, | 289 | std::size_t size_in_bytes) { |
| 379 | unspecialized.entries, unspecialized.code, | 290 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 380 | unspecialized.code_b)); | 291 | params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, |
| 381 | } | 292 | precompiled_shader.entries, precompiled_shader.program)); |
| 382 | |||
| 383 | GLuint CachedShader::GetHandle(const ProgramVariant& variant) { | ||
| 384 | EnsureValidLockerVariant(); | ||
| 385 | |||
| 386 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); | ||
| 387 | auto& program = entry->second; | ||
| 388 | if (!is_cache_miss) { | ||
| 389 | return program->handle; | ||
| 390 | } | ||
| 391 | |||
| 392 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, | ||
| 393 | *curr_locker_variant->locker, variant); | ||
| 394 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); | ||
| 395 | |||
| 396 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 397 | return program->handle; | ||
| 398 | } | ||
| 399 | |||
| 400 | bool CachedShader::EnsureValidLockerVariant() { | ||
| 401 | const auto previous_variant = curr_locker_variant; | ||
| 402 | if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { | ||
| 403 | curr_locker_variant = nullptr; | ||
| 404 | } | ||
| 405 | if (!curr_locker_variant) { | ||
| 406 | for (auto& variant : locker_variants) { | ||
| 407 | if (variant->locker->IsConsistent()) { | ||
| 408 | curr_locker_variant = variant.get(); | ||
| 409 | } | ||
| 410 | } | ||
| 411 | } | ||
| 412 | if (!curr_locker_variant) { | ||
| 413 | auto& new_variant = locker_variants.emplace_back(); | ||
| 414 | new_variant = std::make_unique<LockerVariant>(); | ||
| 415 | new_variant->locker = MakeLocker(system, shader_type); | ||
| 416 | curr_locker_variant = new_variant.get(); | ||
| 417 | } | ||
| 418 | return previous_variant == curr_locker_variant; | ||
| 419 | } | ||
| 420 | |||
| 421 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ||
| 422 | const ConstBufferLocker& locker) const { | ||
| 423 | return ShaderDiskCacheUsage{unique_identifier, variant, | ||
| 424 | locker.GetBoundBuffer(), locker.GetKeys(), | ||
| 425 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; | ||
| 426 | } | 293 | } |
| 427 | 294 | ||
| 428 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 295 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -432,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& | |||
| 432 | 299 | ||
| 433 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 300 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 434 | const VideoCore::DiskResourceLoadCallback& callback) { | 301 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 435 | const auto transferable = disk_cache.LoadTransferable(); | 302 | const std::optional transferable = disk_cache.LoadTransferable(); |
| 436 | if (!transferable) { | 303 | if (!transferable) { |
| 437 | return; | 304 | return; |
| 438 | } | 305 | } |
| 439 | const auto [raws, shader_usages] = *transferable; | ||
| 440 | if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | ||
| 441 | return; | ||
| 442 | } | ||
| 443 | 306 | ||
| 444 | const auto dumps = disk_cache.LoadPrecompiled(); | 307 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); |
| 445 | const auto supported_formats = GetSupportedFormats(); | 308 | const auto supported_formats = GetSupportedFormats(); |
| 446 | 309 | ||
| 447 | // Track if precompiled cache was altered during loading to know if we have to | 310 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -450,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 450 | 313 | ||
| 451 | // Inform the frontend about shader build initialization | 314 | // Inform the frontend about shader build initialization |
| 452 | if (callback) { | 315 | if (callback) { |
| 453 | callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); | 316 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); |
| 454 | } | 317 | } |
| 455 | 318 | ||
| 456 | std::mutex mutex; | 319 | std::mutex mutex; |
| 457 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | 320 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex |
| 458 | std::atomic_bool compilation_failed = false; | 321 | std::atomic_bool gl_cache_failed = false; |
| 459 | 322 | ||
| 460 | const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | 323 | const auto find_precompiled = [&gl_cache](u64 id) { |
| 461 | std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, | 324 | return std::find_if(gl_cache.begin(), gl_cache.end(), |
| 462 | const ShaderDumpsMap& dumps) { | 325 | [id](const auto& entry) { return entry.unique_identifier == id; }); |
| 326 | }; | ||
| 327 | |||
| 328 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||
| 329 | std::size_t end) { | ||
| 463 | context->MakeCurrent(); | 330 | context->MakeCurrent(); |
| 464 | SCOPE_EXIT({ return context->DoneCurrent(); }); | 331 | SCOPE_EXIT({ return context->DoneCurrent(); }); |
| 465 | 332 | ||
| 466 | for (std::size_t i = begin; i < end; ++i) { | 333 | for (std::size_t i = begin; i < end; ++i) { |
| 467 | if (stop_loading || compilation_failed) { | 334 | if (stop_loading) { |
| 468 | return; | 335 | return; |
| 469 | } | 336 | } |
| 470 | const auto& usage{shader_usages[i]}; | 337 | const auto& entry = (*transferable)[i]; |
| 471 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | 338 | const u64 uid = entry.unique_identifier; |
| 472 | const auto dump{dumps.find(usage)}; | 339 | const auto it = find_precompiled(uid); |
| 473 | 340 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | |
| 474 | CachedProgram shader; | 341 | |
| 475 | if (dump != dumps.end()) { | 342 | const bool is_compute = entry.type == ShaderType::Compute; |
| 476 | // If the shader is dumped, attempt to load it with | 343 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 477 | shader = GeneratePrecompiledProgram(dump->second, supported_formats); | 344 | auto registry = MakeRegistry(entry); |
| 478 | if (!shader) { | 345 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |
| 479 | compilation_failed = true; | 346 | |
| 480 | return; | 347 | std::shared_ptr<OGLProgram> program; |
| 348 | if (precompiled_entry) { | ||
| 349 | // If the shader is precompiled, attempt to load it with | ||
| 350 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 351 | if (!program) { | ||
| 352 | gl_cache_failed = true; | ||
| 481 | } | 353 | } |
| 482 | } | 354 | } |
| 483 | if (!shader) { | 355 | if (!program) { |
| 484 | auto locker{MakeLocker(system, unspecialized.type)}; | 356 | // Otherwise compile it from GLSL |
| 485 | FillLocker(*locker, usage); | 357 | program = BuildShader(device, entry.type, uid, ir, *registry, true); |
| 486 | |||
| 487 | shader = BuildShader(device, usage.unique_identifier, unspecialized.type, | ||
| 488 | unspecialized.code, unspecialized.code_b, *locker, | ||
| 489 | usage.variant, true); | ||
| 490 | } | 358 | } |
| 491 | 359 | ||
| 360 | PrecompiledShader shader; | ||
| 361 | shader.program = std::move(program); | ||
| 362 | shader.registry = std::move(registry); | ||
| 363 | shader.entries = MakeEntries(ir); | ||
| 364 | |||
| 492 | std::scoped_lock lock{mutex}; | 365 | std::scoped_lock lock{mutex}; |
| 493 | if (callback) { | 366 | if (callback) { |
| 494 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | 367 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |
| 495 | shader_usages.size()); | 368 | transferable->size()); |
| 496 | } | 369 | } |
| 497 | 370 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | |
| 498 | precompiled_programs.emplace(usage, std::move(shader)); | ||
| 499 | |||
| 500 | // TODO(Rodrigo): Is there a better way to do this? | ||
| 501 | precompiled_variants[usage.unique_identifier].push_back( | ||
| 502 | precompiled_programs.find(usage)); | ||
| 503 | } | 371 | } |
| 504 | }; | 372 | }; |
| 505 | 373 | ||
| 506 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; | 374 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; |
| 507 | const std::size_t bucket_size{shader_usages.size() / num_workers}; | 375 | const std::size_t bucket_size{transferable->size() / num_workers}; |
| 508 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | 376 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |
| 509 | std::vector<std::thread> threads(num_workers); | 377 | std::vector<std::thread> threads(num_workers); |
| 510 | for (std::size_t i = 0; i < num_workers; ++i) { | 378 | for (std::size_t i = 0; i < num_workers; ++i) { |
| 511 | const bool is_last_worker = i + 1 == num_workers; | 379 | const bool is_last_worker = i + 1 == num_workers; |
| 512 | const std::size_t start{bucket_size * i}; | 380 | const std::size_t start{bucket_size * i}; |
| 513 | const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; | 381 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; |
| 514 | 382 | ||
| 515 | // On some platforms the shared context has to be created from the GUI thread | 383 | // On some platforms the shared context has to be created from the GUI thread |
| 516 | contexts[i] = emu_window.CreateSharedContext(); | 384 | contexts[i] = emu_window.CreateSharedContext(); |
| 517 | threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); | 385 | threads[i] = std::thread(worker, contexts[i].get(), start, end); |
| 518 | } | 386 | } |
| 519 | for (auto& thread : threads) { | 387 | for (auto& thread : threads) { |
| 520 | thread.join(); | 388 | thread.join(); |
| 521 | } | 389 | } |
| 522 | 390 | ||
| 523 | if (compilation_failed) { | 391 | if (gl_cache_failed) { |
| 524 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 392 | // Invalidate the precompiled cache if a shader dumped shader was rejected |
| 525 | disk_cache.InvalidatePrecompiled(); | 393 | disk_cache.InvalidatePrecompiled(); |
| 526 | precompiled_cache_altered = true; | 394 | precompiled_cache_altered = true; |
| @@ -533,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 533 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 401 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 534 | // before precompiling them | 402 | // before precompiling them |
| 535 | 403 | ||
| 536 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { | 404 | for (std::size_t i = 0; i < transferable->size(); ++i) { |
| 537 | const auto& usage{shader_usages[i]}; | 405 | const u64 id = (*transferable)[i].unique_identifier; |
| 538 | if (dumps.find(usage) == dumps.end()) { | 406 | const auto it = find_precompiled(id); |
| 539 | const auto& program{precompiled_programs.at(usage)}; | 407 | if (it == gl_cache.end()) { |
| 540 | disk_cache.SaveDump(usage, program->handle); | 408 | const GLuint program = runtime_cache.at(id).program->handle; |
| 409 | disk_cache.SavePrecompiled(id, program); | ||
| 541 | precompiled_cache_altered = true; | 410 | precompiled_cache_altered = true; |
| 542 | } | 411 | } |
| 543 | } | 412 | } |
| @@ -547,80 +416,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 547 | } | 416 | } |
| 548 | } | 417 | } |
| 549 | 418 | ||
| 550 | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { | 419 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 551 | const auto it = precompiled_variants.find(unique_identifier); | 420 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 552 | return it == precompiled_variants.end() ? nullptr : &it->second; | 421 | const std::unordered_set<GLenum>& supported_formats) { |
| 553 | } | 422 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| 554 | 423 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | |
| 555 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 556 | const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||
| 557 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | ||
| 558 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | ||
| 559 | return {}; | 424 | return {}; |
| 560 | } | 425 | } |
| 561 | 426 | ||
| 562 | CachedProgram shader = std::make_shared<OGLProgram>(); | 427 | auto program = std::make_shared<OGLProgram>(); |
| 563 | shader->handle = glCreateProgram(); | 428 | program->handle = glCreateProgram(); |
| 564 | glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 429 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 565 | glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), | 430 | glProgramBinary(program->handle, precompiled_entry.binary_format, |
| 566 | static_cast<GLsizei>(dump.binary.size())); | 431 | precompiled_entry.binary.data(), |
| 567 | 432 | static_cast<GLsizei>(precompiled_entry.binary.size())); | |
| 568 | GLint link_status{}; | 433 | |
| 569 | glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); | 434 | GLint link_status; |
| 435 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | ||
| 570 | if (link_status == GL_FALSE) { | 436 | if (link_status == GL_FALSE) { |
| 571 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); | 437 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 572 | return {}; | 438 | return {}; |
| 573 | } | 439 | } |
| 574 | 440 | ||
| 575 | return shader; | 441 | return program; |
| 576 | } | ||
| 577 | |||
| 578 | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | ||
| 579 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | ||
| 580 | const std::vector<ShaderDiskCacheRaw>& raws) { | ||
| 581 | if (callback) { | ||
| 582 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | ||
| 583 | } | ||
| 584 | |||
| 585 | for (std::size_t i = 0; i < raws.size(); ++i) { | ||
| 586 | if (stop_loading) { | ||
| 587 | return false; | ||
| 588 | } | ||
| 589 | const auto& raw{raws[i]}; | ||
| 590 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | ||
| 591 | const u64 calculated_hash{ | ||
| 592 | GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; | ||
| 593 | if (unique_identifier != calculated_hash) { | ||
| 594 | LOG_ERROR(Render_OpenGL, | ||
| 595 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " | ||
| 596 | "removing shader cache", | ||
| 597 | raw.GetUniqueIdentifier(), calculated_hash); | ||
| 598 | disk_cache.InvalidateTransferable(); | ||
| 599 | return false; | ||
| 600 | } | ||
| 601 | |||
| 602 | const u32 main_offset = | ||
| 603 | raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 604 | ConstBufferLocker locker(raw.GetType()); | ||
| 605 | const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); | ||
| 606 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 607 | // std::optional<ShaderIR> ir_b; | ||
| 608 | // if (raw.HasProgramA()) { | ||
| 609 | // ir_b.emplace(raw.GetProgramCodeB(), main_offset); | ||
| 610 | // } | ||
| 611 | |||
| 612 | UnspecializedShader unspecialized; | ||
| 613 | unspecialized.entries = GLShader::GetEntries(ir); | ||
| 614 | unspecialized.type = raw.GetType(); | ||
| 615 | unspecialized.code = raw.GetCode(); | ||
| 616 | unspecialized.code_b = raw.GetCodeB(); | ||
| 617 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); | ||
| 618 | |||
| 619 | if (callback) { | ||
| 620 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | ||
| 621 | } | ||
| 622 | } | ||
| 623 | return true; | ||
| 624 | } | 442 | } |
| 625 | 443 | ||
| 626 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 444 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| @@ -648,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 648 | 466 | ||
| 649 | const auto unique_identifier = GetUniqueIdentifier( | 467 | const auto unique_identifier = GetUniqueIdentifier( |
| 650 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 468 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 651 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 652 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 469 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 653 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 470 | const ShaderParameters params{system, disk_cache, device, |
| 654 | cpu_addr, host_ptr, unique_identifier}; | 471 | cpu_addr, host_ptr, unique_identifier}; |
| 655 | 472 | ||
| 656 | const auto found = unspecialized_shaders.find(unique_identifier); | 473 | const auto found = runtime_cache.find(unique_identifier); |
| 657 | if (found == unspecialized_shaders.end()) { | 474 | if (found == runtime_cache.end()) { |
| 658 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | 475 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), |
| 659 | std::move(code_b)); | 476 | std::move(code_b)); |
| 660 | } else { | 477 | } else { |
| 661 | shader = CachedShader::CreateFromCache(params, found->second); | 478 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 479 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 662 | } | 480 | } |
| 663 | Register(shader); | 481 | Register(shader); |
| 664 | 482 | ||
| @@ -673,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 673 | return kernel; | 491 | return kernel; |
| 674 | } | 492 | } |
| 675 | 493 | ||
| 676 | // No kernel found - create a new one | 494 | // No kernel found, create a new one |
| 677 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 495 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 678 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; | 496 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |
| 679 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 680 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 497 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 681 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 498 | const ShaderParameters params{system, disk_cache, device, |
| 682 | cpu_addr, host_ptr, unique_identifier}; | 499 | cpu_addr, host_ptr, unique_identifier}; |
| 683 | 500 | ||
| 684 | const auto found = unspecialized_shaders.find(unique_identifier); | 501 | const auto found = runtime_cache.find(unique_identifier); |
| 685 | if (found == unspecialized_shaders.end()) { | 502 | if (found == runtime_cache.end()) { |
| 686 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 503 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |
| 687 | } else { | 504 | } else { |
| 688 | kernel = CachedShader::CreateFromCache(params, found->second); | 505 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 506 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 689 | } | 507 | } |
| 690 | 508 | ||
| 691 | Register(kernel); | 509 | Register(kernel); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 7b1470db3..4935019fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/shader/const_buffer_locker.h" | 25 | #include "video_core/shader/registry.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 26 | #include "video_core/shader/shader_ir.h" |
| 27 | 27 | ||
| 28 | namespace Core { | 28 | namespace Core { |
| @@ -41,22 +41,17 @@ class RasterizerOpenGL; | |||
| 41 | struct UnspecializedShader; | 41 | struct UnspecializedShader; |
| 42 | 42 | ||
| 43 | using Shader = std::shared_ptr<CachedShader>; | 43 | using Shader = std::shared_ptr<CachedShader>; |
| 44 | using CachedProgram = std::shared_ptr<OGLProgram>; | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 45 | |
| 47 | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; | 46 | struct PrecompiledShader { |
| 48 | 47 | std::shared_ptr<OGLProgram> program; | |
| 49 | struct UnspecializedShader { | 48 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 50 | GLShader::ShaderEntries entries; | 49 | ShaderEntries entries; |
| 51 | Tegra::Engines::ShaderType type; | ||
| 52 | ProgramCode code; | ||
| 53 | ProgramCode code_b; | ||
| 54 | }; | 50 | }; |
| 55 | 51 | ||
| 56 | struct ShaderParameters { | 52 | struct ShaderParameters { |
| 57 | Core::System& system; | 53 | Core::System& system; |
| 58 | ShaderDiskCacheOpenGL& disk_cache; | 54 | ShaderDiskCacheOpenGL& disk_cache; |
| 59 | const PrecompiledVariants* precompiled_variants; | ||
| 60 | const Device& device; | 55 | const Device& device; |
| 61 | VAddr cpu_addr; | 56 | VAddr cpu_addr; |
| 62 | u8* host_ptr; | 57 | u8* host_ptr; |
| @@ -65,61 +60,45 @@ struct ShaderParameters { | |||
| 65 | 60 | ||
| 66 | class CachedShader final : public RasterizerCacheObject { | 61 | class CachedShader final : public RasterizerCacheObject { |
| 67 | public: | 62 | public: |
| 68 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 63 | ~CachedShader(); |
| 69 | Maxwell::ShaderProgram program_type, | ||
| 70 | ProgramCode program_code, ProgramCode program_code_b); | ||
| 71 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | ||
| 72 | 64 | ||
| 73 | static Shader CreateFromCache(const ShaderParameters& params, | 65 | /// Gets the GL program handle for the shader |
| 74 | const UnspecializedShader& unspecialized); | 66 | GLuint GetHandle() const; |
| 75 | 67 | ||
| 68 | /// Returns the guest CPU address of the shader | ||
| 76 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 77 | return cpu_addr; | 70 | return cpu_addr; |
| 78 | } | 71 | } |
| 79 | 72 | ||
| 73 | /// Returns the size in bytes of the shader | ||
| 80 | std::size_t GetSizeInBytes() const override { | 74 | std::size_t GetSizeInBytes() const override { |
| 81 | return code.size() * sizeof(u64); | 75 | return size_in_bytes; |
| 82 | } | 76 | } |
| 83 | 77 | ||
| 84 | /// Gets the shader entries for the shader | 78 | /// Gets the shader entries for the shader |
| 85 | const GLShader::ShaderEntries& GetShaderEntries() const { | 79 | const ShaderEntries& GetEntries() const { |
| 86 | return entries; | 80 | return entries; |
| 87 | } | 81 | } |
| 88 | 82 | ||
| 89 | /// Gets the GL program handle for the shader | 83 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 90 | GLuint GetHandle(const ProgramVariant& variant); | 84 | Maxwell::ShaderProgram program_type, |
| 91 | 85 | ProgramCode program_code, ProgramCode program_code_b); | |
| 92 | private: | 86 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); |
| 93 | struct LockerVariant { | ||
| 94 | std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||
| 95 | std::unordered_map<ProgramVariant, CachedProgram> programs; | ||
| 96 | }; | ||
| 97 | |||
| 98 | explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, | ||
| 99 | GLShader::ShaderEntries entries, ProgramCode program_code, | ||
| 100 | ProgramCode program_code_b); | ||
| 101 | |||
| 102 | bool EnsureValidLockerVariant(); | ||
| 103 | |||
| 104 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, | ||
| 105 | const VideoCommon::Shader::ConstBufferLocker& locker) const; | ||
| 106 | |||
| 107 | Core::System& system; | ||
| 108 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 109 | const Device& device; | ||
| 110 | |||
| 111 | VAddr cpu_addr{}; | ||
| 112 | |||
| 113 | u64 unique_identifier{}; | ||
| 114 | Tegra::Engines::ShaderType shader_type{}; | ||
| 115 | |||
| 116 | GLShader::ShaderEntries entries; | ||
| 117 | 87 | ||
| 118 | ProgramCode code; | 88 | static Shader CreateFromCache(const ShaderParameters& params, |
| 119 | ProgramCode code_b; | 89 | const PrecompiledShader& precompiled_shader, |
| 90 | std::size_t size_in_bytes); | ||
| 120 | 91 | ||
| 121 | LockerVariant* curr_locker_variant = nullptr; | 92 | private: |
| 122 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; | 93 | explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 94 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | ||
| 95 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | ||
| 96 | |||
| 97 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 98 | ShaderEntries entries; | ||
| 99 | VAddr cpu_addr = 0; | ||
| 100 | std::size_t size_in_bytes = 0; | ||
| 101 | std::shared_ptr<OGLProgram> program; | ||
| 123 | }; | 102 | }; |
| 124 | 103 | ||
| 125 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 104 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -142,25 +121,15 @@ protected: | |||
| 142 | void FlushObjectInner(const Shader& object) override {} | 121 | void FlushObjectInner(const Shader& object) override {} |
| 143 | 122 | ||
| 144 | private: | 123 | private: |
| 145 | bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, | 124 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( |
| 146 | const VideoCore::DiskResourceLoadCallback& callback, | 125 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 147 | const std::vector<ShaderDiskCacheRaw>& raws); | 126 | const std::unordered_set<GLenum>& supported_formats); |
| 148 | |||
| 149 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | ||
| 150 | const std::unordered_set<GLenum>& supported_formats); | ||
| 151 | |||
| 152 | const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||
| 153 | 127 | ||
| 154 | Core::System& system; | 128 | Core::System& system; |
| 155 | Core::Frontend::EmuWindow& emu_window; | 129 | Core::Frontend::EmuWindow& emu_window; |
| 156 | const Device& device; | 130 | const Device& device; |
| 157 | |||
| 158 | ShaderDiskCacheOpenGL disk_cache; | 131 | ShaderDiskCacheOpenGL disk_cache; |
| 159 | 132 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | |
| 160 | PrecompiledPrograms precompiled_programs; | ||
| 161 | std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||
| 162 | |||
| 163 | std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||
| 164 | 133 | ||
| 165 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 134 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 166 | }; | 135 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3a41ed30c..3adf7f0cb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -23,8 +23,9 @@ | |||
| 23 | #include "video_core/shader/ast.h" | 23 | #include "video_core/shader/ast.h" |
| 24 | #include "video_core/shader/node.h" | 24 | #include "video_core/shader/node.h" |
| 25 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | #include "video_core/shader/transform_feedback.h" | ||
| 26 | 27 | ||
| 27 | namespace OpenGL::GLShader { | 28 | namespace OpenGL { |
| 28 | 29 | ||
| 29 | namespace { | 30 | namespace { |
| 30 | 31 | ||
| @@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode; | |||
| 36 | using Tegra::Shader::IpaMode; | 37 | using Tegra::Shader::IpaMode; |
| 37 | using Tegra::Shader::IpaSampleMode; | 38 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using VideoCommon::Shader::BuildTransformFeedback; | ||
| 41 | using VideoCommon::Shader::Registry; | ||
| 39 | 42 | ||
| 40 | using namespace std::string_literals; | 43 | using namespace std::string_literals; |
| 41 | using namespace VideoCommon::Shader; | 44 | using namespace VideoCommon::Shader; |
| @@ -48,6 +51,11 @@ class ExprDecompiler; | |||
| 48 | 51 | ||
| 49 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 50 | 53 | ||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 51 | struct TextureOffset {}; | 59 | struct TextureOffset {}; |
| 52 | struct TextureDerivates {}; | 60 | struct TextureDerivates {}; |
| 53 | using TextureArgument = std::pair<Type, Node>; | 61 | using TextureArgument = std::pair<Type, Node>; |
| @@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| 56 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 64 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 57 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); | 65 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 58 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | ||
| 68 | #define ftou floatBitsToUint | ||
| 69 | #define itof intBitsToFloat | ||
| 70 | #define utof uintBitsToFloat | ||
| 71 | |||
| 72 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 73 | bvec2 is_nan1 = isnan(pair1); | ||
| 74 | bvec2 is_nan2 = isnan(pair2); | ||
| 75 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 76 | }} | ||
| 77 | |||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 80 | |||
| 81 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 82 | float y_direction; | ||
| 83 | }}; | ||
| 84 | )"; | ||
| 85 | |||
| 59 | class ShaderWriter final { | 86 | class ShaderWriter final { |
| 60 | public: | 87 | public: |
| 61 | void AddExpression(std::string_view text) { | 88 | void AddExpression(std::string_view text) { |
| @@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | |||
| 269 | } | 296 | } |
| 270 | } | 297 | } |
| 271 | 298 | ||
| 299 | /// Describes primitive behavior on geometry shaders | ||
| 300 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 301 | switch (topology) { | ||
| 302 | case Maxwell::PrimitiveTopology::Points: | ||
| 303 | return {"points", 1}; | ||
| 304 | case Maxwell::PrimitiveTopology::Lines: | ||
| 305 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 306 | return {"lines", 2}; | ||
| 307 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 308 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 309 | return {"lines_adjacency", 4}; | ||
| 310 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 311 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 312 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 313 | return {"triangles", 3}; | ||
| 314 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 315 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 316 | return {"triangles_adjacency", 6}; | ||
| 317 | default: | ||
| 318 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 319 | return {"points", 1}; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 272 | /// Generates code to use for a swizzle operation. | 323 | /// Generates code to use for a swizzle operation. |
| 273 | constexpr const char* GetSwizzle(u32 element) { | 324 | constexpr const char* GetSwizzle(std::size_t element) { |
| 274 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | 325 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |
| 275 | return swizzle.at(element); | 326 | return swizzle.at(element); |
| 276 | } | 327 | } |
| 277 | 328 | ||
| 329 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 330 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 331 | return swizzle.at(element); | ||
| 332 | } | ||
| 333 | |||
| 278 | /// Translate topology | 334 | /// Translate topology |
| 279 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | 335 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 280 | switch (topology) { | 336 | switch (topology) { |
| @@ -341,11 +397,66 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 341 | return stage == ShaderType::Vertex; | 397 | return stage == ShaderType::Vertex; |
| 342 | } | 398 | } |
| 343 | 399 | ||
| 400 | struct GenericVaryingDescription { | ||
| 401 | std::string name; | ||
| 402 | u8 first_element = 0; | ||
| 403 | bool is_scalar = false; | ||
| 404 | }; | ||
| 405 | |||
| 344 | class GLSLDecompiler final { | 406 | class GLSLDecompiler final { |
| 345 | public: | 407 | public: |
| 346 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, | 408 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 347 | std::string suffix) | 409 | ShaderType stage, std::string_view identifier, std::string_view suffix) |
| 348 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 410 | : device{device}, ir{ir}, registry{registry}, stage{stage}, |
| 411 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { | ||
| 412 | if (stage != ShaderType::Compute) { | ||
| 413 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 414 | } | ||
| 415 | } | ||
| 416 | |||
| 417 | void Decompile() { | ||
| 418 | DeclareHeader(); | ||
| 419 | DeclareVertex(); | ||
| 420 | DeclareGeometry(); | ||
| 421 | DeclareFragment(); | ||
| 422 | DeclareCompute(); | ||
| 423 | DeclareInputAttributes(); | ||
| 424 | DeclareOutputAttributes(); | ||
| 425 | DeclareImages(); | ||
| 426 | DeclareSamplers(); | ||
| 427 | DeclareGlobalMemory(); | ||
| 428 | DeclareConstantBuffers(); | ||
| 429 | DeclareLocalMemory(); | ||
| 430 | DeclareRegisters(); | ||
| 431 | DeclarePredicates(); | ||
| 432 | DeclareInternalFlags(); | ||
| 433 | DeclareCustomVariables(); | ||
| 434 | DeclarePhysicalAttributeReader(); | ||
| 435 | |||
| 436 | code.AddLine("void main() {{"); | ||
| 437 | ++code.scope; | ||
| 438 | |||
| 439 | if (stage == ShaderType::Vertex) { | ||
| 440 | code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||
| 441 | } | ||
| 442 | |||
| 443 | if (ir.IsDecompiled()) { | ||
| 444 | DecompileAST(); | ||
| 445 | } else { | ||
| 446 | DecompileBranchMode(); | ||
| 447 | } | ||
| 448 | |||
| 449 | --code.scope; | ||
| 450 | code.AddLine("}}"); | ||
| 451 | } | ||
| 452 | |||
| 453 | std::string GetResult() { | ||
| 454 | return code.GetResult(); | ||
| 455 | } | ||
| 456 | |||
| 457 | private: | ||
| 458 | friend class ASTDecompiler; | ||
| 459 | friend class ExprDecompiler; | ||
| 349 | 460 | ||
| 350 | void DecompileBranchMode() { | 461 | void DecompileBranchMode() { |
| 351 | // VM's program counter | 462 | // VM's program counter |
| @@ -387,43 +498,36 @@ public: | |||
| 387 | 498 | ||
| 388 | void DecompileAST(); | 499 | void DecompileAST(); |
| 389 | 500 | ||
| 390 | void Decompile() { | 501 | void DeclareHeader() { |
| 391 | DeclareVertex(); | 502 | if (!identifier.empty()) { |
| 392 | DeclareGeometry(); | 503 | code.AddLine("// {}", identifier); |
| 393 | DeclareRegisters(); | 504 | } |
| 394 | DeclareCustomVariables(); | 505 | code.AddLine("#version 440 core"); |
| 395 | DeclarePredicates(); | 506 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 396 | DeclareLocalMemory(); | 507 | if (device.HasShaderBallot()) { |
| 397 | DeclareInternalFlags(); | 508 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| 398 | DeclareInputAttributes(); | ||
| 399 | DeclareOutputAttributes(); | ||
| 400 | DeclareConstantBuffers(); | ||
| 401 | DeclareGlobalMemory(); | ||
| 402 | DeclareSamplers(); | ||
| 403 | DeclareImages(); | ||
| 404 | DeclarePhysicalAttributeReader(); | ||
| 405 | |||
| 406 | code.AddLine("void execute_{}() {{", suffix); | ||
| 407 | ++code.scope; | ||
| 408 | |||
| 409 | if (ir.IsDecompiled()) { | ||
| 410 | DecompileAST(); | ||
| 411 | } else { | ||
| 412 | DecompileBranchMode(); | ||
| 413 | } | 509 | } |
| 510 | if (device.HasVertexViewportLayer()) { | ||
| 511 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); | ||
| 512 | } | ||
| 513 | if (device.HasImageLoadFormatted()) { | ||
| 514 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); | ||
| 515 | } | ||
| 516 | if (device.HasWarpIntrinsics()) { | ||
| 517 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | ||
| 518 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | ||
| 519 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | ||
| 520 | } | ||
| 521 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 522 | // operations) on places where we don't want to. | ||
| 523 | // Thanks to Ryujinx for finding this workaround. | ||
| 524 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 414 | 525 | ||
| 415 | --code.scope; | 526 | code.AddNewLine(); |
| 416 | code.AddLine("}}"); | ||
| 417 | } | ||
| 418 | 527 | ||
| 419 | std::string GetResult() { | 528 | code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); |
| 420 | return code.GetResult(); | ||
| 421 | } | 529 | } |
| 422 | 530 | ||
| 423 | private: | ||
| 424 | friend class ASTDecompiler; | ||
| 425 | friend class ExprDecompiler; | ||
| 426 | |||
| 427 | void DeclareVertex() { | 531 | void DeclareVertex() { |
| 428 | if (!IsVertexShader(stage)) | 532 | if (!IsVertexShader(stage)) |
| 429 | return; | 533 | return; |
| @@ -436,9 +540,15 @@ private: | |||
| 436 | return; | 540 | return; |
| 437 | } | 541 | } |
| 438 | 542 | ||
| 543 | const auto& info = registry.GetGraphicsInfo(); | ||
| 544 | const auto input_topology = info.primitive_topology; | ||
| 545 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 546 | max_input_vertices = max_vertices; | ||
| 547 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 548 | |||
| 439 | const auto topology = GetTopologyName(header.common3.output_topology); | 549 | const auto topology = GetTopologyName(header.common3.output_topology); |
| 440 | const auto max_vertices = header.common4.max_output_vertices.Value(); | 550 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); |
| 441 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); | 551 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); |
| 442 | code.AddNewLine(); | 552 | code.AddNewLine(); |
| 443 | 553 | ||
| 444 | code.AddLine("in gl_PerVertex {{"); | 554 | code.AddLine("in gl_PerVertex {{"); |
| @@ -450,11 +560,40 @@ private: | |||
| 450 | DeclareVertexRedeclarations(); | 560 | DeclareVertexRedeclarations(); |
| 451 | } | 561 | } |
| 452 | 562 | ||
| 563 | void DeclareFragment() { | ||
| 564 | if (stage != ShaderType::Fragment) { | ||
| 565 | return; | ||
| 566 | } | ||
| 567 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 568 | code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); | ||
| 569 | } | ||
| 570 | } | ||
| 571 | |||
| 572 | void DeclareCompute() { | ||
| 573 | if (stage != ShaderType::Compute) { | ||
| 574 | return; | ||
| 575 | } | ||
| 576 | const auto& info = registry.GetComputeInfo(); | ||
| 577 | if (const u32 size = info.shared_memory_size_in_words; size > 0) { | ||
| 578 | code.AddLine("shared uint smem[{}];", size); | ||
| 579 | code.AddNewLine(); | ||
| 580 | } | ||
| 581 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 582 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 583 | code.AddNewLine(); | ||
| 584 | } | ||
| 585 | |||
| 453 | void DeclareVertexRedeclarations() { | 586 | void DeclareVertexRedeclarations() { |
| 454 | code.AddLine("out gl_PerVertex {{"); | 587 | code.AddLine("out gl_PerVertex {{"); |
| 455 | ++code.scope; | 588 | ++code.scope; |
| 456 | 589 | ||
| 457 | code.AddLine("vec4 gl_Position;"); | 590 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); |
| 591 | if (!pos_xfb.empty()) { | ||
| 592 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 593 | } | ||
| 594 | const char* pos_type = | ||
| 595 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 596 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 458 | 597 | ||
| 459 | for (const auto attribute : ir.GetOutputAttributes()) { | 598 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 460 | if (attribute == Attribute::Index::ClipDistances0123 || | 599 | if (attribute == Attribute::Index::ClipDistances0123 || |
| @@ -525,18 +664,16 @@ private: | |||
| 525 | } | 664 | } |
| 526 | 665 | ||
| 527 | void DeclareLocalMemory() { | 666 | void DeclareLocalMemory() { |
| 667 | u64 local_memory_size = 0; | ||
| 528 | if (stage == ShaderType::Compute) { | 668 | if (stage == ShaderType::Compute) { |
| 529 | code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); | 669 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; |
| 530 | code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); | 670 | } else { |
| 531 | code.AddLine("#endif"); | 671 | local_memory_size = header.GetLocalMemorySize(); |
| 532 | return; | ||
| 533 | } | 672 | } |
| 534 | |||
| 535 | const u64 local_memory_size = header.GetLocalMemorySize(); | ||
| 536 | if (local_memory_size == 0) { | 673 | if (local_memory_size == 0) { |
| 537 | return; | 674 | return; |
| 538 | } | 675 | } |
| 539 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 676 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 540 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | 677 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); |
| 541 | code.AddNewLine(); | 678 | code.AddNewLine(); |
| 542 | } | 679 | } |
| @@ -589,7 +726,7 @@ private: | |||
| 589 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | 726 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { |
| 590 | const u32 location{GetGenericAttributeIndex(index)}; | 727 | const u32 location{GetGenericAttributeIndex(index)}; |
| 591 | 728 | ||
| 592 | std::string name{GetInputAttribute(index)}; | 729 | std::string name{GetGenericInputAttribute(index)}; |
| 593 | if (stage == ShaderType::Geometry) { | 730 | if (stage == ShaderType::Geometry) { |
| 594 | name = "gs_" + name + "[]"; | 731 | name = "gs_" + name + "[]"; |
| 595 | } | 732 | } |
| @@ -626,9 +763,59 @@ private: | |||
| 626 | } | 763 | } |
| 627 | } | 764 | } |
| 628 | 765 | ||
| 766 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 767 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 768 | const auto it = transform_feedback.find(location); | ||
| 769 | if (it == transform_feedback.end()) { | ||
| 770 | return {}; | ||
| 771 | } | ||
| 772 | return it->second.components; | ||
| 773 | } | ||
| 774 | |||
| 775 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 776 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 777 | const auto it = transform_feedback.find(location); | ||
| 778 | if (it == transform_feedback.end()) { | ||
| 779 | return {}; | ||
| 780 | } | ||
| 781 | |||
| 782 | const VaryingTFB& tfb = it->second; | ||
| 783 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 784 | tfb.offset, tfb.stride); | ||
| 785 | } | ||
| 786 | |||
| 629 | void DeclareOutputAttribute(Attribute::Index index) { | 787 | void DeclareOutputAttribute(Attribute::Index index) { |
| 630 | const u32 location{GetGenericAttributeIndex(index)}; | 788 | static constexpr std::string_view swizzle = "xyzw"; |
| 631 | code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); | 789 | u8 element = 0; |
| 790 | while (element < 4) { | ||
| 791 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 792 | if (!xfb.empty()) { | ||
| 793 | xfb = fmt::format(", {}", xfb); | ||
| 794 | } | ||
| 795 | const std::size_t remainder = 4 - element; | ||
| 796 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 797 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 798 | |||
| 799 | const u32 location = GetGenericAttributeIndex(index); | ||
| 800 | |||
| 801 | GenericVaryingDescription description; | ||
| 802 | description.first_element = static_cast<u8>(element); | ||
| 803 | description.is_scalar = num_components == 1; | ||
| 804 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 805 | if (element != 0 || num_components != 4) { | ||
| 806 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 807 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 808 | } | ||
| 809 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 810 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 811 | varying_description.insert({offset, description}); | ||
| 812 | } | ||
| 813 | |||
| 814 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 815 | xfb, type, description.name); | ||
| 816 | |||
| 817 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 818 | } | ||
| 632 | } | 819 | } |
| 633 | 820 | ||
| 634 | void DeclareConstantBuffers() { | 821 | void DeclareConstantBuffers() { |
| @@ -925,7 +1112,8 @@ private: | |||
| 925 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 1112 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 926 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 1113 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 927 | // this happens and what's its intent. | 1114 | // this happens and what's its intent. |
| 928 | return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); | 1115 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), |
| 1116 | max_input_vertices.value()); | ||
| 929 | } | 1117 | } |
| 930 | return std::string(name); | 1118 | return std::string(name); |
| 931 | }; | 1119 | }; |
| @@ -980,7 +1168,7 @@ private: | |||
| 980 | return {"0", Type::Int}; | 1168 | return {"0", Type::Int}; |
| 981 | default: | 1169 | default: |
| 982 | if (IsGenericAttribute(attribute)) { | 1170 | if (IsGenericAttribute(attribute)) { |
| 983 | return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), | 1171 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), |
| 984 | Type::Float}; | 1172 | Type::Float}; |
| 985 | } | 1173 | } |
| 986 | break; | 1174 | break; |
| @@ -1049,8 +1237,7 @@ private: | |||
| 1049 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; | 1237 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; |
| 1050 | default: | 1238 | default: |
| 1051 | if (IsGenericAttribute(attribute)) { | 1239 | if (IsGenericAttribute(attribute)) { |
| 1052 | return { | 1240 | return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}}; |
| 1053 | {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; | ||
| 1054 | } | 1241 | } |
| 1055 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | 1242 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); |
| 1056 | return {}; | 1243 | return {}; |
| @@ -1945,7 +2132,7 @@ private: | |||
| 1945 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 2132 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1946 | for (u32 component = 0; component < 4; ++component) { | 2133 | for (u32 component = 0; component < 4; ++component) { |
| 1947 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 2134 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1948 | code.AddLine("FragColor{}[{}] = {};", render_target, component, | 2135 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), |
| 1949 | SafeGetRegister(current_reg).AsFloat()); | 2136 | SafeGetRegister(current_reg).AsFloat()); |
| 1950 | ++current_reg; | 2137 | ++current_reg; |
| 1951 | } | 2138 | } |
| @@ -2261,27 +2448,34 @@ private: | |||
| 2261 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2448 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2262 | 2449 | ||
| 2263 | std::string GetRegister(u32 index) const { | 2450 | std::string GetRegister(u32 index) const { |
| 2264 | return GetDeclarationWithSuffix(index, "gpr"); | 2451 | return AppendSuffix(index, "gpr"); |
| 2265 | } | 2452 | } |
| 2266 | 2453 | ||
| 2267 | std::string GetCustomVariable(u32 index) const { | 2454 | std::string GetCustomVariable(u32 index) const { |
| 2268 | return GetDeclarationWithSuffix(index, "custom_var"); | 2455 | return AppendSuffix(index, "custom_var"); |
| 2269 | } | 2456 | } |
| 2270 | 2457 | ||
| 2271 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | 2458 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 2272 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); | 2459 | return AppendSuffix(static_cast<u32>(pred), "pred"); |
| 2273 | } | 2460 | } |
| 2274 | 2461 | ||
| 2275 | std::string GetInputAttribute(Attribute::Index attribute) const { | 2462 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { |
| 2276 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); | 2463 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); |
| 2277 | } | 2464 | } |
| 2278 | 2465 | ||
| 2279 | std::string GetOutputAttribute(Attribute::Index attribute) const { | 2466 | std::unordered_map<u8, GenericVaryingDescription> varying_description; |
| 2280 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); | 2467 | |
| 2468 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2469 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2470 | const auto& description = varying_description.at(offset); | ||
| 2471 | if (description.is_scalar) { | ||
| 2472 | return description.name; | ||
| 2473 | } | ||
| 2474 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2281 | } | 2475 | } |
| 2282 | 2476 | ||
| 2283 | std::string GetConstBuffer(u32 index) const { | 2477 | std::string GetConstBuffer(u32 index) const { |
| 2284 | return GetDeclarationWithSuffix(index, "cbuf"); | 2478 | return AppendSuffix(index, "cbuf"); |
| 2285 | } | 2479 | } |
| 2286 | 2480 | ||
| 2287 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | 2481 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { |
| @@ -2294,11 +2488,15 @@ private: | |||
| 2294 | } | 2488 | } |
| 2295 | 2489 | ||
| 2296 | std::string GetConstBufferBlock(u32 index) const { | 2490 | std::string GetConstBufferBlock(u32 index) const { |
| 2297 | return GetDeclarationWithSuffix(index, "cbuf_block"); | 2491 | return AppendSuffix(index, "cbuf_block"); |
| 2298 | } | 2492 | } |
| 2299 | 2493 | ||
| 2300 | std::string GetLocalMemory() const { | 2494 | std::string GetLocalMemory() const { |
| 2301 | return "lmem_" + suffix; | 2495 | if (suffix.empty()) { |
| 2496 | return "lmem"; | ||
| 2497 | } else { | ||
| 2498 | return "lmem_" + std::string{suffix}; | ||
| 2499 | } | ||
| 2302 | } | 2500 | } |
| 2303 | 2501 | ||
| 2304 | std::string GetInternalFlag(InternalFlag flag) const { | 2502 | std::string GetInternalFlag(InternalFlag flag) const { |
| @@ -2307,19 +2505,27 @@ private: | |||
| 2307 | const auto index = static_cast<u32>(flag); | 2505 | const auto index = static_cast<u32>(flag); |
| 2308 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | 2506 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); |
| 2309 | 2507 | ||
| 2310 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | 2508 | if (suffix.empty()) { |
| 2509 | return InternalFlagNames[index]; | ||
| 2510 | } else { | ||
| 2511 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2512 | } | ||
| 2311 | } | 2513 | } |
| 2312 | 2514 | ||
| 2313 | std::string GetSampler(const Sampler& sampler) const { | 2515 | std::string GetSampler(const Sampler& sampler) const { |
| 2314 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); | 2516 | return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 2315 | } | 2517 | } |
| 2316 | 2518 | ||
| 2317 | std::string GetImage(const Image& image) const { | 2519 | std::string GetImage(const Image& image) const { |
| 2318 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | 2520 | return AppendSuffix(static_cast<u32>(image.GetIndex()), "image"); |
| 2319 | } | 2521 | } |
| 2320 | 2522 | ||
| 2321 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { | 2523 | std::string AppendSuffix(u32 index, std::string_view name) const { |
| 2322 | return fmt::format("{}_{}_{}", name, index, suffix); | 2524 | if (suffix.empty()) { |
| 2525 | return fmt::format("{}{}", name, index); | ||
| 2526 | } else { | ||
| 2527 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2528 | } | ||
| 2323 | } | 2529 | } |
| 2324 | 2530 | ||
| 2325 | u32 GetNumPhysicalInputAttributes() const { | 2531 | u32 GetNumPhysicalInputAttributes() const { |
| @@ -2334,17 +2540,31 @@ private: | |||
| 2334 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | 2540 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); |
| 2335 | } | 2541 | } |
| 2336 | 2542 | ||
| 2543 | bool IsRenderTargetEnabled(u32 render_target) const { | ||
| 2544 | for (u32 component = 0; component < 4; ++component) { | ||
| 2545 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2546 | return true; | ||
| 2547 | } | ||
| 2548 | } | ||
| 2549 | return false; | ||
| 2550 | } | ||
| 2551 | |||
| 2337 | const Device& device; | 2552 | const Device& device; |
| 2338 | const ShaderIR& ir; | 2553 | const ShaderIR& ir; |
| 2554 | const Registry& registry; | ||
| 2339 | const ShaderType stage; | 2555 | const ShaderType stage; |
| 2340 | const std::string suffix; | 2556 | const std::string_view identifier; |
| 2557 | const std::string_view suffix; | ||
| 2341 | const Header header; | 2558 | const Header header; |
| 2559 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2342 | 2560 | ||
| 2343 | ShaderWriter code; | 2561 | ShaderWriter code; |
| 2562 | |||
| 2563 | std::optional<u32> max_input_vertices; | ||
| 2344 | }; | 2564 | }; |
| 2345 | 2565 | ||
| 2346 | std::string GetFlowVariable(u32 i) { | 2566 | std::string GetFlowVariable(u32 index) { |
| 2347 | return fmt::format("flow_var_{}", i); | 2567 | return fmt::format("flow_var{}", index); |
| 2348 | } | 2568 | } |
| 2349 | 2569 | ||
| 2350 | class ExprDecompiler { | 2570 | class ExprDecompiler { |
| @@ -2531,7 +2751,7 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2531 | 2751 | ||
| 2532 | } // Anonymous namespace | 2752 | } // Anonymous namespace |
| 2533 | 2753 | ||
| 2534 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | 2754 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { |
| 2535 | ShaderEntries entries; | 2755 | ShaderEntries entries; |
| 2536 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 2756 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 2537 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | 2757 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |
| @@ -2555,28 +2775,12 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2555 | return entries; | 2775 | return entries; |
| 2556 | } | 2776 | } |
| 2557 | 2777 | ||
| 2558 | std::string GetCommonDeclarations() { | 2778 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 2559 | return R"(#define ftoi floatBitsToInt | 2779 | ShaderType stage, std::string_view identifier, |
| 2560 | #define ftou floatBitsToUint | 2780 | std::string_view suffix) { |
| 2561 | #define itof intBitsToFloat | 2781 | GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); |
| 2562 | #define utof uintBitsToFloat | ||
| 2563 | |||
| 2564 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | ||
| 2565 | bvec2 is_nan1 = isnan(pair1); | ||
| 2566 | bvec2 is_nan2 = isnan(pair2); | ||
| 2567 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 2571 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 2572 | )"; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, | ||
| 2576 | const std::string& suffix) { | ||
| 2577 | GLSLDecompiler decompiler(device, ir, stage, suffix); | ||
| 2578 | decompiler.Decompile(); | 2782 | decompiler.Decompile(); |
| 2579 | return decompiler.GetResult(); | 2783 | return decompiler.GetResult(); |
| 2580 | } | 2784 | } |
| 2581 | 2785 | ||
| 2582 | } // namespace OpenGL::GLShader | 2786 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 0f692c1db..e7dbd810c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -6,22 +6,18 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <string_view> | ||
| 9 | #include <utility> | 10 | #include <utility> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/shader_type.h" | 14 | #include "video_core/engines/shader_type.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 15 | 17 | ||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace OpenGL { | 18 | namespace OpenGL { |
| 21 | class Device; | ||
| 22 | } | ||
| 23 | 19 | ||
| 24 | namespace OpenGL::GLShader { | 20 | class Device; |
| 25 | 21 | ||
| 26 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 27 | using SamplerEntry = VideoCommon::Shader::Sampler; | 23 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| @@ -78,11 +74,11 @@ struct ShaderEntries { | |||
| 78 | std::size_t shader_length{}; | 74 | std::size_t shader_length{}; |
| 79 | }; | 75 | }; |
| 80 | 76 | ||
| 81 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | 77 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 82 | |||
| 83 | std::string GetCommonDeclarations(); | ||
| 84 | 78 | ||
| 85 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 79 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 86 | Tegra::Engines::ShaderType stage, const std::string& suffix); | 80 | const VideoCommon::Shader::Registry& registry, |
| 81 | Tegra::Engines::ShaderType stage, std::string_view identifier, | ||
| 82 | std::string_view suffix = {}); | ||
| 87 | 83 | ||
| 88 | } // namespace OpenGL::GLShader | 84 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 1fc204f6f..9e95a122b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -31,32 +31,24 @@ namespace { | |||
| 31 | 31 | ||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 32 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 33 | ||
| 34 | enum class TransferableEntryKind : u32 { | ||
| 35 | Raw, | ||
| 36 | Usage, | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct ConstBufferKey { | 34 | struct ConstBufferKey { |
| 40 | u32 cbuf{}; | 35 | u32 cbuf = 0; |
| 41 | u32 offset{}; | 36 | u32 offset = 0; |
| 42 | u32 value{}; | 37 | u32 value = 0; |
| 43 | }; | 38 | }; |
| 44 | 39 | ||
| 45 | struct BoundSamplerKey { | 40 | struct BoundSamplerKey { |
| 46 | u32 offset{}; | 41 | u32 offset = 0; |
| 47 | Tegra::Engines::SamplerDescriptor sampler{}; | 42 | Tegra::Engines::SamplerDescriptor sampler; |
| 48 | }; | 43 | }; |
| 49 | 44 | ||
| 50 | struct BindlessSamplerKey { | 45 | struct BindlessSamplerKey { |
| 51 | u32 cbuf{}; | 46 | u32 cbuf = 0; |
| 52 | u32 offset{}; | 47 | u32 offset = 0; |
| 53 | Tegra::Engines::SamplerDescriptor sampler{}; | 48 | Tegra::Engines::SamplerDescriptor sampler; |
| 54 | }; | 49 | }; |
| 55 | 50 | ||
| 56 | constexpr u32 NativeVersion = 12; | 51 | constexpr u32 NativeVersion = 20; |
| 57 | |||
| 58 | // Making sure sizes doesn't change by accident | ||
| 59 | static_assert(sizeof(ProgramVariant) == 20); | ||
| 60 | 52 | ||
| 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 62 | ShaderCacheVersionHash hash{}; | 54 | ShaderCacheVersionHash hash{}; |
| @@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 67 | 59 | ||
| 68 | } // Anonymous namespace | 60 | } // Anonymous namespace |
| 69 | 61 | ||
| 70 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, | 62 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; |
| 71 | ProgramCode code_b) | ||
| 72 | : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( | ||
| 73 | code_b)} {} | ||
| 74 | 63 | ||
| 75 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 64 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; |
| 76 | 65 | ||
| 77 | ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; | 66 | bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { |
| 78 | 67 | if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | |
| 79 | bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | ||
| 80 | if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || | ||
| 81 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | ||
| 82 | return false; | 68 | return false; |
| 83 | } | 69 | } |
| 84 | u32 code_size{}; | 70 | u32 code_size; |
| 85 | u32 code_size_b{}; | 71 | u32 code_size_b; |
| 86 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || | 72 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || |
| 87 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { | 73 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { |
| 88 | return false; | 74 | return false; |
| 89 | } | 75 | } |
| 90 | |||
| 91 | code.resize(code_size); | 76 | code.resize(code_size); |
| 92 | code_b.resize(code_size_b); | 77 | code_b.resize(code_size_b); |
| 93 | 78 | ||
| 94 | if (file.ReadArray(code.data(), code_size) != code_size) | 79 | if (file.ReadArray(code.data(), code_size) != code_size) { |
| 95 | return false; | 80 | return false; |
| 96 | 81 | } | |
| 97 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { | 82 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { |
| 98 | return false; | 83 | return false; |
| 99 | } | 84 | } |
| 85 | |||
| 86 | u8 is_texture_handler_size_known; | ||
| 87 | u32 texture_handler_size_value; | ||
| 88 | u32 num_keys; | ||
| 89 | u32 num_bound_samplers; | ||
| 90 | u32 num_bindless_samplers; | ||
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | ||
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | ||
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || | ||
| 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | ||
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 97 | return false; | ||
| 98 | } | ||
| 99 | if (is_texture_handler_size_known) { | ||
| 100 | texture_handler_size = texture_handler_size_value; | ||
| 101 | } | ||
| 102 | |||
| 103 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 104 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | ||
| 105 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | ||
| 106 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | ||
| 107 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | ||
| 108 | flat_bound_samplers.size() || | ||
| 109 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | ||
| 110 | flat_bindless_samplers.size()) { | ||
| 111 | return false; | ||
| 112 | } | ||
| 113 | for (const auto& key : flat_keys) { | ||
| 114 | keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 115 | } | ||
| 116 | for (const auto& key : flat_bound_samplers) { | ||
| 117 | bound_samplers.emplace(key.offset, key.sampler); | ||
| 118 | } | ||
| 119 | for (const auto& key : flat_bindless_samplers) { | ||
| 120 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 121 | } | ||
| 122 | |||
| 100 | return true; | 123 | return true; |
| 101 | } | 124 | } |
| 102 | 125 | ||
| 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 126 | bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { |
| 104 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || | 127 | if (file.WriteObject(static_cast<u32>(type)) != 1 || |
| 105 | file.WriteObject(static_cast<u32>(code.size())) != 1 || | 128 | file.WriteObject(static_cast<u32>(code.size())) != 1 || |
| 106 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { | 129 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { |
| 107 | return false; | 130 | return false; |
| 108 | } | 131 | } |
| 109 | 132 | if (file.WriteArray(code.data(), code.size()) != code.size()) { | |
| 110 | if (file.WriteArray(code.data(), code.size()) != code.size()) | ||
| 111 | return false; | 133 | return false; |
| 112 | 134 | } | |
| 113 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { | 135 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { |
| 114 | return false; | 136 | return false; |
| 115 | } | 137 | } |
| 116 | return true; | 138 | |
| 139 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || | ||
| 140 | file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || | ||
| 141 | file.WriteObject(texture_handler_size.value_or(0)) != 1 || | ||
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | ||
| 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | ||
| 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | ||
| 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | ||
| 146 | return false; | ||
| 147 | } | ||
| 148 | |||
| 149 | std::vector<ConstBufferKey> flat_keys; | ||
| 150 | flat_keys.reserve(keys.size()); | ||
| 151 | for (const auto& [address, value] : keys) { | ||
| 152 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 153 | } | ||
| 154 | |||
| 155 | std::vector<BoundSamplerKey> flat_bound_samplers; | ||
| 156 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 157 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 158 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | ||
| 159 | } | ||
| 160 | |||
| 161 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | ||
| 162 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 163 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 164 | flat_bindless_samplers.push_back( | ||
| 165 | BindlessSamplerKey{address.first, address.second, sampler}); | ||
| 166 | } | ||
| 167 | |||
| 168 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | ||
| 169 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | ||
| 170 | flat_bound_samplers.size() && | ||
| 171 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | ||
| 172 | flat_bindless_samplers.size(); | ||
| 117 | } | 173 | } |
| 118 | 174 | ||
| 119 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} | 175 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} |
| 120 | 176 | ||
| 121 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | 177 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; |
| 122 | 178 | ||
| 123 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 179 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { |
| 124 | ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 125 | // Skip games without title id | 180 | // Skip games without title id |
| 126 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; | 181 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; |
| 127 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { | 182 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { |
| @@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 130 | 185 | ||
| 131 | FileUtil::IOFile file(GetTransferablePath(), "rb"); | 186 | FileUtil::IOFile file(GetTransferablePath(), "rb"); |
| 132 | if (!file.IsOpen()) { | 187 | if (!file.IsOpen()) { |
| 133 | LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", | 188 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); |
| 134 | GetTitleID()); | ||
| 135 | is_usable = true; | 189 | is_usable = true; |
| 136 | return {}; | 190 | return {}; |
| 137 | } | 191 | } |
| 138 | 192 | ||
| 139 | u32 version{}; | 193 | u32 version{}; |
| 140 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { | 194 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { |
| 141 | LOG_ERROR(Render_OpenGL, | 195 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); |
| 142 | "Failed to get transferable cache version for title id={}, skipping", | ||
| 143 | GetTitleID()); | ||
| 144 | return {}; | 196 | return {}; |
| 145 | } | 197 | } |
| 146 | 198 | ||
| @@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 158 | } | 210 | } |
| 159 | 211 | ||
| 160 | // Version is valid, load the shaders | 212 | // Version is valid, load the shaders |
| 161 | constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; | 213 | std::vector<ShaderDiskCacheEntry> entries; |
| 162 | std::vector<ShaderDiskCacheRaw> raws; | ||
| 163 | std::vector<ShaderDiskCacheUsage> usages; | ||
| 164 | while (file.Tell() < file.GetSize()) { | 214 | while (file.Tell() < file.GetSize()) { |
| 165 | TransferableEntryKind kind{}; | 215 | ShaderDiskCacheEntry& entry = entries.emplace_back(); |
| 166 | if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { | 216 | if (!entry.Load(file)) { |
| 167 | LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); | 217 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); |
| 168 | return {}; | ||
| 169 | } | ||
| 170 | |||
| 171 | switch (kind) { | ||
| 172 | case TransferableEntryKind::Raw: { | ||
| 173 | ShaderDiskCacheRaw entry; | ||
| 174 | if (!entry.Load(file)) { | ||
| 175 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | transferable.insert({entry.GetUniqueIdentifier(), {}}); | ||
| 179 | raws.push_back(std::move(entry)); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case TransferableEntryKind::Usage: { | ||
| 183 | ShaderDiskCacheUsage usage; | ||
| 184 | |||
| 185 | u32 num_keys{}; | ||
| 186 | u32 num_bound_samplers{}; | ||
| 187 | u32 num_bindless_samplers{}; | ||
| 188 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||
| 189 | file.ReadArray(&usage.variant, 1) != 1 || | ||
| 190 | file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 191 | file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 192 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 193 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | |||
| 197 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 198 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 199 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 200 | if (file.ReadArray(keys.data(), keys.size()) != keys.size() || | ||
| 201 | file.ReadArray(bound_samplers.data(), bound_samplers.size()) != | ||
| 202 | bound_samplers.size() || | ||
| 203 | file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 204 | bindless_samplers.size()) { | ||
| 205 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 206 | return {}; | ||
| 207 | } | ||
| 208 | for (const auto& key : keys) { | ||
| 209 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 210 | } | ||
| 211 | for (const auto& key : bound_samplers) { | ||
| 212 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 213 | } | ||
| 214 | for (const auto& key : bindless_samplers) { | ||
| 215 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 216 | } | ||
| 217 | |||
| 218 | usages.push_back(std::move(usage)); | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | default: | ||
| 222 | LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", | ||
| 223 | static_cast<u32>(kind)); | ||
| 224 | return {}; | 218 | return {}; |
| 225 | } | 219 | } |
| 226 | } | 220 | } |
| 227 | 221 | ||
| 228 | is_usable = true; | 222 | is_usable = true; |
| 229 | return {{std::move(raws), std::move(usages)}}; | 223 | return {std::move(entries)}; |
| 230 | } | 224 | } |
| 231 | 225 | ||
| 232 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> | 226 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { |
| 233 | ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 234 | if (!is_usable) { | 227 | if (!is_usable) { |
| 235 | return {}; | 228 | return {}; |
| 236 | } | 229 | } |
| 237 | 230 | ||
| 238 | std::string path = GetPrecompiledPath(); | 231 | FileUtil::IOFile file(GetPrecompiledPath(), "rb"); |
| 239 | FileUtil::IOFile file(path, "rb"); | ||
| 240 | if (!file.IsOpen()) { | 232 | if (!file.IsOpen()) { |
| 241 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | 233 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); |
| 242 | GetTitleID()); | ||
| 243 | return {}; | 234 | return {}; |
| 244 | } | 235 | } |
| 245 | 236 | ||
| 246 | const auto result = LoadPrecompiledFile(file); | 237 | if (const auto result = LoadPrecompiledFile(file)) { |
| 247 | if (!result) { | 238 | return *result; |
| 248 | LOG_INFO(Render_OpenGL, | ||
| 249 | "Failed to load precompiled cache for game with title id={}, removing", | ||
| 250 | GetTitleID()); | ||
| 251 | file.Close(); | ||
| 252 | InvalidatePrecompiled(); | ||
| 253 | return {}; | ||
| 254 | } | 239 | } |
| 255 | return *result; | 240 | |
| 241 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 242 | file.Close(); | ||
| 243 | InvalidatePrecompiled(); | ||
| 244 | return {}; | ||
| 256 | } | 245 | } |
| 257 | 246 | ||
| 258 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 247 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( |
| 259 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | 248 | FileUtil::IOFile& file) { |
| 260 | // Read compressed file from disk and decompress to virtual precompiled cache file | 249 | // Read compressed file from disk and decompress to virtual precompiled cache file |
| 261 | std::vector<u8> compressed(file.GetSize()); | 250 | std::vector<u8> compressed(file.GetSize()); |
| 262 | file.ReadBytes(compressed.data(), compressed.size()); | 251 | file.ReadBytes(compressed.data(), compressed.size()); |
| @@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 275 | return {}; | 264 | return {}; |
| 276 | } | 265 | } |
| 277 | 266 | ||
| 278 | ShaderDumpsMap dumps; | 267 | std::vector<ShaderDiskCachePrecompiled> entries; |
| 279 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | 268 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |
| 280 | u32 num_keys{}; | 269 | u32 binary_size; |
| 281 | u32 num_bound_samplers{}; | 270 | auto& entry = entries.emplace_back(); |
| 282 | u32 num_bindless_samplers{}; | 271 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || |
| 283 | ShaderDiskCacheUsage usage; | 272 | !LoadObjectFromPrecompiled(entry.binary_format) || |
| 284 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | 273 | !LoadObjectFromPrecompiled(binary_size)) { |
| 285 | !LoadObjectFromPrecompiled(usage.variant) || | ||
| 286 | !LoadObjectFromPrecompiled(usage.bound_buffer) || | ||
| 287 | !LoadObjectFromPrecompiled(num_keys) || | ||
| 288 | !LoadObjectFromPrecompiled(num_bound_samplers) || | ||
| 289 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||
| 290 | return {}; | ||
| 291 | } | ||
| 292 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 293 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 294 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 295 | if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || | ||
| 296 | !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != | ||
| 297 | bound_samplers.size() || | ||
| 298 | !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 299 | bindless_samplers.size()) { | ||
| 300 | return {}; | ||
| 301 | } | ||
| 302 | for (const auto& key : keys) { | ||
| 303 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 304 | } | ||
| 305 | for (const auto& key : bound_samplers) { | ||
| 306 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 307 | } | ||
| 308 | for (const auto& key : bindless_samplers) { | ||
| 309 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 310 | } | ||
| 311 | |||
| 312 | ShaderDiskCacheDump dump; | ||
| 313 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||
| 314 | return {}; | ||
| 315 | } | ||
| 316 | |||
| 317 | u32 binary_length{}; | ||
| 318 | if (!LoadObjectFromPrecompiled(binary_length)) { | ||
| 319 | return {}; | 274 | return {}; |
| 320 | } | 275 | } |
| 321 | 276 | ||
| 322 | dump.binary.resize(binary_length); | 277 | entry.binary.resize(binary_size); |
| 323 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | 278 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { |
| 324 | return {}; | 279 | return {}; |
| 325 | } | 280 | } |
| 326 | |||
| 327 | dumps.emplace(std::move(usage), dump); | ||
| 328 | } | 281 | } |
| 329 | return dumps; | 282 | return entries; |
| 330 | } | 283 | } |
| 331 | 284 | ||
| 332 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | 285 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { |
| @@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | |||
| 346 | } | 299 | } |
| 347 | } | 300 | } |
| 348 | 301 | ||
| 349 | void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | 302 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { |
| 350 | if (!is_usable) { | 303 | if (!is_usable) { |
| 351 | return; | 304 | return; |
| 352 | } | 305 | } |
| 353 | 306 | ||
| 354 | const u64 id = entry.GetUniqueIdentifier(); | 307 | const u64 id = entry.unique_identifier; |
| 355 | if (transferable.find(id) != transferable.end()) { | 308 | if (stored_transferable.find(id) != stored_transferable.end()) { |
| 356 | // The shader already exists | 309 | // The shader already exists |
| 357 | return; | 310 | return; |
| 358 | } | 311 | } |
| @@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | |||
| 361 | if (!file.IsOpen()) { | 314 | if (!file.IsOpen()) { |
| 362 | return; | 315 | return; |
| 363 | } | 316 | } |
| 364 | if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { | 317 | if (!entry.Save(file)) { |
| 365 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | 318 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); |
| 366 | file.Close(); | 319 | file.Close(); |
| 367 | InvalidateTransferable(); | 320 | InvalidateTransferable(); |
| 368 | return; | 321 | return; |
| 369 | } | 322 | } |
| 370 | transferable.insert({id, {}}); | ||
| 371 | } | ||
| 372 | 323 | ||
| 373 | void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | 324 | stored_transferable.insert(id); |
| 374 | if (!is_usable) { | ||
| 375 | return; | ||
| 376 | } | ||
| 377 | |||
| 378 | const auto it = transferable.find(usage.unique_identifier); | ||
| 379 | ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); | ||
| 380 | |||
| 381 | auto& usages{it->second}; | ||
| 382 | if (usages.find(usage) != usages.end()) { | ||
| 383 | // Skip this variant since the shader is already stored. | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | usages.insert(usage); | ||
| 387 | |||
| 388 | FileUtil::IOFile file = AppendTransferableFile(); | ||
| 389 | if (!file.IsOpen()) | ||
| 390 | return; | ||
| 391 | const auto Close = [&] { | ||
| 392 | LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); | ||
| 393 | file.Close(); | ||
| 394 | InvalidateTransferable(); | ||
| 395 | }; | ||
| 396 | |||
| 397 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | ||
| 398 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | ||
| 399 | file.WriteObject(usage.bound_buffer) != 1 || | ||
| 400 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | ||
| 401 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||
| 402 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||
| 403 | Close(); | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | for (const auto& [pair, value] : usage.keys) { | ||
| 407 | const auto [cbuf, offset] = pair; | ||
| 408 | if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 409 | Close(); | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 414 | if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 415 | Close(); | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 420 | const auto [cbuf, offset] = pair; | ||
| 421 | if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 422 | Close(); | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | } | ||
| 426 | } | 325 | } |
| 427 | 326 | ||
| 428 | void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { | 327 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { |
| 429 | if (!is_usable) { | 328 | if (!is_usable) { |
| 430 | return; | 329 | return; |
| 431 | } | 330 | } |
| @@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 437 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | 336 | SavePrecompiledHeaderToVirtualPrecompiledCache(); |
| 438 | } | 337 | } |
| 439 | 338 | ||
| 440 | GLint binary_length{}; | 339 | GLint binary_length; |
| 441 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | 340 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |
| 442 | 341 | ||
| 443 | GLenum binary_format{}; | 342 | GLenum binary_format; |
| 444 | std::vector<u8> binary(binary_length); | 343 | std::vector<u8> binary(binary_length); |
| 445 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 344 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 446 | 345 | ||
| 447 | const auto Close = [&] { | 346 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || |
| 347 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 348 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 448 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | 349 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", |
| 449 | usage.unique_identifier); | 350 | unique_identifier); |
| 450 | InvalidatePrecompiled(); | 351 | InvalidatePrecompiled(); |
| 451 | }; | ||
| 452 | |||
| 453 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||
| 454 | !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || | ||
| 455 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||
| 456 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||
| 457 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||
| 458 | Close(); | ||
| 459 | return; | ||
| 460 | } | ||
| 461 | for (const auto& [pair, value] : usage.keys) { | ||
| 462 | const auto [cbuf, offset] = pair; | ||
| 463 | if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 464 | Close(); | ||
| 465 | return; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 469 | if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 470 | Close(); | ||
| 471 | return; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 475 | const auto [cbuf, offset] = pair; | ||
| 476 | if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 477 | Close(); | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | } | ||
| 481 | if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||
| 482 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||
| 483 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 484 | Close(); | ||
| 485 | } | 352 | } |
| 486 | } | 353 | } |
| 487 | 354 | ||
| @@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | |||
| 534 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { | 401 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { |
| 535 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | 402 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", |
| 536 | precompiled_path); | 403 | precompiled_path); |
| 537 | return; | ||
| 538 | } | 404 | } |
| 539 | } | 405 | } |
| 540 | 406 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ef2371f6d..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -19,8 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 22 | #include "video_core/shader/registry.h" |
| 23 | #include "video_core/shader/const_buffer_locker.h" | ||
| 24 | 23 | ||
| 25 | namespace Core { | 24 | namespace Core { |
| 26 | class System; | 25 | class System; |
| @@ -32,139 +31,39 @@ class IOFile; | |||
| 32 | 31 | ||
| 33 | namespace OpenGL { | 32 | namespace OpenGL { |
| 34 | 33 | ||
| 35 | struct ShaderDiskCacheUsage; | ||
| 36 | struct ShaderDiskCacheDump; | ||
| 37 | |||
| 38 | using ProgramCode = std::vector<u64>; | 34 | using ProgramCode = std::vector<u64>; |
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 40 | |||
| 41 | /// Describes the different variants a program can be compiled with. | ||
| 42 | struct ProgramVariant final { | ||
| 43 | ProgramVariant() = default; | ||
| 44 | |||
| 45 | /// Graphics constructor. | ||
| 46 | explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept | ||
| 47 | : primitive_mode{primitive_mode} {} | ||
| 48 | |||
| 49 | /// Compute constructor. | ||
| 50 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, | ||
| 51 | u32 local_memory_size) noexcept | ||
| 52 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, | ||
| 53 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} | ||
| 54 | |||
| 55 | // Graphics specific parameters. | ||
| 56 | GLenum primitive_mode{}; | ||
| 57 | |||
| 58 | // Compute specific parameters. | ||
| 59 | u32 block_x{}; | ||
| 60 | u16 block_y{}; | ||
| 61 | u16 block_z{}; | ||
| 62 | u32 shared_memory_size{}; | ||
| 63 | u32 local_memory_size{}; | ||
| 64 | |||
| 65 | bool operator==(const ProgramVariant& rhs) const noexcept { | ||
| 66 | return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, | ||
| 67 | local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, | ||
| 68 | rhs.block_z, rhs.shared_memory_size, | ||
| 69 | rhs.local_memory_size); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool operator!=(const ProgramVariant& rhs) const noexcept { | ||
| 73 | return !operator==(rhs); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||
| 77 | |||
| 78 | /// Describes how a shader is used. | ||
| 79 | struct ShaderDiskCacheUsage { | ||
| 80 | u64 unique_identifier{}; | ||
| 81 | ProgramVariant variant; | ||
| 82 | u32 bound_buffer{}; | ||
| 83 | VideoCommon::Shader::KeyMap keys; | ||
| 84 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 85 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 86 | |||
| 87 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | ||
| 88 | return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == | ||
| 89 | std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, | ||
| 90 | rhs.bindless_samplers); | ||
| 91 | } | ||
| 92 | |||
| 93 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | ||
| 94 | return !operator==(rhs); | ||
| 95 | } | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace OpenGL | ||
| 99 | |||
| 100 | namespace std { | ||
| 101 | |||
| 102 | template <> | ||
| 103 | struct hash<OpenGL::ProgramVariant> { | ||
| 104 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | ||
| 105 | return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ | ||
| 106 | static_cast<std::size_t>(variant.block_x) ^ | ||
| 107 | (static_cast<std::size_t>(variant.block_y) << 32) ^ | ||
| 108 | (static_cast<std::size_t>(variant.block_z) << 48) ^ | ||
| 109 | (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ | ||
| 110 | (static_cast<std::size_t>(variant.local_memory_size) << 36); | ||
| 111 | } | ||
| 112 | }; | ||
| 113 | |||
| 114 | template <> | ||
| 115 | struct hash<OpenGL::ShaderDiskCacheUsage> { | ||
| 116 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | ||
| 117 | return static_cast<std::size_t>(usage.unique_identifier) ^ | ||
| 118 | std::hash<OpenGL::ProgramVariant>{}(usage.variant); | ||
| 119 | } | ||
| 120 | }; | ||
| 121 | |||
| 122 | } // namespace std | ||
| 123 | |||
| 124 | namespace OpenGL { | ||
| 125 | 35 | ||
| 126 | /// Describes a shader how it's used by the guest GPU | 36 | /// Describes a shader and how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 37 | struct ShaderDiskCacheEntry { |
| 128 | public: | 38 | ShaderDiskCacheEntry(); |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, | 39 | ~ShaderDiskCacheEntry(); |
| 130 | ProgramCode code, ProgramCode code_b = {}); | ||
| 131 | ShaderDiskCacheRaw(); | ||
| 132 | ~ShaderDiskCacheRaw(); | ||
| 133 | 40 | ||
| 134 | bool Load(FileUtil::IOFile& file); | 41 | bool Load(FileUtil::IOFile& file); |
| 135 | 42 | ||
| 136 | bool Save(FileUtil::IOFile& file) const; | 43 | bool Save(FileUtil::IOFile& file) const; |
| 137 | 44 | ||
| 138 | u64 GetUniqueIdentifier() const { | ||
| 139 | return unique_identifier; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool HasProgramA() const { | 45 | bool HasProgramA() const { |
| 143 | return !code.empty() && !code_b.empty(); | 46 | return !code.empty() && !code_b.empty(); |
| 144 | } | 47 | } |
| 145 | 48 | ||
| 146 | Tegra::Engines::ShaderType GetType() const { | ||
| 147 | return type; | ||
| 148 | } | ||
| 149 | |||
| 150 | const ProgramCode& GetCode() const { | ||
| 151 | return code; | ||
| 152 | } | ||
| 153 | |||
| 154 | const ProgramCode& GetCodeB() const { | ||
| 155 | return code_b; | ||
| 156 | } | ||
| 157 | |||
| 158 | private: | ||
| 159 | u64 unique_identifier{}; | ||
| 160 | Tegra::Engines::ShaderType type{}; | 49 | Tegra::Engines::ShaderType type{}; |
| 161 | ProgramCode code; | 50 | ProgramCode code; |
| 162 | ProgramCode code_b; | 51 | ProgramCode code_b; |
| 52 | |||
| 53 | u64 unique_identifier = 0; | ||
| 54 | std::optional<u32> texture_handler_size; | ||
| 55 | u32 bound_buffer = 0; | ||
| 56 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 58 | VideoCommon::Shader::KeyMap keys; | ||
| 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 163 | }; | 61 | }; |
| 164 | 62 | ||
| 165 | /// Contains an OpenGL dumped binary program | 63 | /// Contains an OpenGL dumped binary program |
| 166 | struct ShaderDiskCacheDump { | 64 | struct ShaderDiskCachePrecompiled { |
| 167 | GLenum binary_format{}; | 65 | u64 unique_identifier = 0; |
| 66 | GLenum binary_format = 0; | ||
| 168 | std::vector<u8> binary; | 67 | std::vector<u8> binary; |
| 169 | }; | 68 | }; |
| 170 | 69 | ||
| @@ -174,11 +73,10 @@ public: | |||
| 174 | ~ShaderDiskCacheOpenGL(); | 73 | ~ShaderDiskCacheOpenGL(); |
| 175 | 74 | ||
| 176 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | 75 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. |
| 177 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 76 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); |
| 178 | LoadTransferable(); | ||
| 179 | 77 | ||
| 180 | /// Loads current game's precompiled cache. Invalidates on failure. | 78 | /// Loads current game's precompiled cache. Invalidates on failure. |
| 181 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); | 79 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); |
| 182 | 80 | ||
| 183 | /// Removes the transferable (and precompiled) cache file. | 81 | /// Removes the transferable (and precompiled) cache file. |
| 184 | void InvalidateTransferable(); | 82 | void InvalidateTransferable(); |
| @@ -187,21 +85,18 @@ public: | |||
| 187 | void InvalidatePrecompiled(); | 85 | void InvalidatePrecompiled(); |
| 188 | 86 | ||
| 189 | /// Saves a raw dump to the transferable file. Checks for collisions. | 87 | /// Saves a raw dump to the transferable file. Checks for collisions. |
| 190 | void SaveRaw(const ShaderDiskCacheRaw& entry); | 88 | void SaveEntry(const ShaderDiskCacheEntry& entry); |
| 191 | |||
| 192 | /// Saves shader usage to the transferable file. Does not check for collisions. | ||
| 193 | void SaveUsage(const ShaderDiskCacheUsage& usage); | ||
| 194 | 89 | ||
| 195 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | 90 | /// Saves a dump entry to the precompiled file. Does not check for collisions. |
| 196 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | 91 | void SavePrecompiled(u64 unique_identifier, GLuint program); |
| 197 | 92 | ||
| 198 | /// Serializes virtual precompiled shader cache file to real file | 93 | /// Serializes virtual precompiled shader cache file to real file |
| 199 | void SaveVirtualPrecompiledFile(); | 94 | void SaveVirtualPrecompiledFile(); |
| 200 | 95 | ||
| 201 | private: | 96 | private: |
| 202 | /// Loads the transferable cache. Returns empty on failure. | 97 | /// Loads the transferable cache. Returns empty on failure. |
| 203 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 98 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( |
| 204 | LoadPrecompiledFile(FileUtil::IOFile& file); | 99 | FileUtil::IOFile& file); |
| 205 | 100 | ||
| 206 | /// Opens current game's transferable file and write it's header if it doesn't exist | 101 | /// Opens current game's transferable file and write it's header if it doesn't exist |
| 207 | FileUtil::IOFile AppendTransferableFile() const; | 102 | FileUtil::IOFile AppendTransferableFile() const; |
| @@ -270,7 +165,7 @@ private: | |||
| 270 | std::size_t precompiled_cache_virtual_file_offset = 0; | 165 | std::size_t precompiled_cache_virtual_file_offset = 0; |
| 271 | 166 | ||
| 272 | // Stored transferable shaders | 167 | // Stored transferable shaders |
| 273 | std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | 168 | std::unordered_set<u64> stored_transferable; |
| 274 | 169 | ||
| 275 | // The cache has been loaded at boot | 170 | // The cache has been loaded at boot |
| 276 | bool is_usable{}; | 171 | bool is_usable{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp deleted file mode 100644 index 34946fb47..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ /dev/null | |||
| @@ -1,109 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace OpenGL::GLShader { | ||
| 17 | |||
| 18 | using Tegra::Engines::Maxwell3D; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | using VideoCommon::Shader::CompileDepth; | ||
| 21 | using VideoCommon::Shader::CompilerSettings; | ||
| 22 | using VideoCommon::Shader::ProgramCode; | ||
| 23 | using VideoCommon::Shader::ShaderIR; | ||
| 24 | |||
| 25 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | ||
| 26 | std::string out = GetCommonDeclarations(); | ||
| 27 | out += fmt::format(R"( | ||
| 28 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 29 | float y_direction; | ||
| 30 | }}; | ||
| 31 | |||
| 32 | )", | ||
| 33 | EmulationUniformBlockBinding); | ||
| 34 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); | ||
| 35 | if (ir_b) { | ||
| 36 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); | ||
| 37 | } | ||
| 38 | |||
| 39 | out += R"( | ||
| 40 | void main() { | ||
| 41 | gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f); | ||
| 42 | execute_vertex(); | ||
| 43 | )"; | ||
| 44 | if (ir_b) { | ||
| 45 | out += " execute_vertex_b();"; | ||
| 46 | } | ||
| 47 | out += "}\n"; | ||
| 48 | return out; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | ||
| 52 | std::string out = GetCommonDeclarations(); | ||
| 53 | out += fmt::format(R"( | ||
| 54 | layout (std140, binding = {}) uniform gs_config {{ | ||
| 55 | float y_direction; | ||
| 56 | }}; | ||
| 57 | |||
| 58 | )", | ||
| 59 | EmulationUniformBlockBinding); | ||
| 60 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); | ||
| 61 | |||
| 62 | out += R"( | ||
| 63 | void main() { | ||
| 64 | execute_geometry(); | ||
| 65 | } | ||
| 66 | )"; | ||
| 67 | return out; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | ||
| 71 | std::string out = GetCommonDeclarations(); | ||
| 72 | out += fmt::format(R"( | ||
| 73 | layout (location = 0) out vec4 FragColor0; | ||
| 74 | layout (location = 1) out vec4 FragColor1; | ||
| 75 | layout (location = 2) out vec4 FragColor2; | ||
| 76 | layout (location = 3) out vec4 FragColor3; | ||
| 77 | layout (location = 4) out vec4 FragColor4; | ||
| 78 | layout (location = 5) out vec4 FragColor5; | ||
| 79 | layout (location = 6) out vec4 FragColor6; | ||
| 80 | layout (location = 7) out vec4 FragColor7; | ||
| 81 | |||
| 82 | layout (std140, binding = {}) uniform fs_config {{ | ||
| 83 | float y_direction; | ||
| 84 | }}; | ||
| 85 | |||
| 86 | )", | ||
| 87 | EmulationUniformBlockBinding); | ||
| 88 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); | ||
| 89 | |||
| 90 | out += R"( | ||
| 91 | void main() { | ||
| 92 | execute_fragment(); | ||
| 93 | } | ||
| 94 | )"; | ||
| 95 | return out; | ||
| 96 | } | ||
| 97 | |||
| 98 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { | ||
| 99 | std::string out = GetCommonDeclarations(); | ||
| 100 | out += Decompile(device, ir, ShaderType::Compute, "compute"); | ||
| 101 | out += R"( | ||
| 102 | void main() { | ||
| 103 | execute_compute(); | ||
| 104 | } | ||
| 105 | )"; | ||
| 106 | return out; | ||
| 107 | } | ||
| 108 | |||
| 109 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h deleted file mode 100644 index cba2be9f9..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ /dev/null | |||
| @@ -1,34 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | class Device; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace OpenGL::GLShader { | ||
| 18 | |||
| 19 | using VideoCommon::Shader::ProgramCode; | ||
| 20 | using VideoCommon::Shader::ShaderIR; | ||
| 21 | |||
| 22 | /// Generates the GLSL vertex shader program source code for the given VS program | ||
| 23 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); | ||
| 24 | |||
| 25 | /// Generates the GLSL geometry shader program source code for the given GS program | ||
| 26 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); | ||
| 27 | |||
| 28 | /// Generates the GLSL fragment shader program source code for the given FS program | ||
| 29 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); | ||
| 30 | |||
| 31 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 32 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); | ||
| 33 | |||
| 34 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 886bde3b9..3847bd722 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 107 | features.occlusionQueryPrecise = true; | 107 | features.occlusionQueryPrecise = true; |
| 108 | features.fragmentStoresAndAtomics = true; | 108 | features.fragmentStoresAndAtomics = true; |
| 109 | features.shaderImageGatherExtended = true; | 109 | features.shaderImageGatherExtended = true; |
| 110 | features.shaderStorageImageReadWithoutFormat = | 110 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; |
| 111 | is_shader_storage_img_read_without_format_supported; | ||
| 112 | features.shaderStorageImageWriteWithoutFormat = true; | 111 | features.shaderStorageImageWriteWithoutFormat = true; |
| 113 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | 112 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; |
| 114 | 113 | ||
| @@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 148 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | 147 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 150 | vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; | ||
| 151 | if (ext_transform_feedback) { | ||
| 152 | transform_feedback.transformFeedback = true; | ||
| 153 | transform_feedback.geometryStreams = true; | ||
| 154 | SetNext(next, transform_feedback); | ||
| 155 | } else { | ||
| 156 | LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); | ||
| 157 | } | ||
| 158 | |||
| 151 | if (!ext_depth_range_unrestricted) { | 159 | if (!ext_depth_range_unrestricted) { |
| 152 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 160 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 153 | } | 161 | } |
| @@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 385 | } | 393 | } |
| 386 | }; | 394 | }; |
| 387 | 395 | ||
| 388 | extensions.reserve(14); | 396 | extensions.reserve(15); |
| 389 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 397 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 390 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 398 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |
| 391 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | 399 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |
| @@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 397 | 405 | ||
| 398 | [[maybe_unused]] const bool nsight = | 406 | [[maybe_unused]] const bool nsight = |
| 399 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 407 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 400 | bool khr_shader_float16_int8{}; | 408 | bool has_khr_shader_float16_int8{}; |
| 401 | bool ext_subgroup_size_control{}; | 409 | bool has_ext_subgroup_size_control{}; |
| 410 | bool has_ext_transform_feedback{}; | ||
| 402 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 411 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 403 | Test(extension, khr_uniform_buffer_standard_layout, | 412 | Test(extension, khr_uniform_buffer_standard_layout, |
| 404 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 413 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 405 | Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 414 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, |
| 415 | false); | ||
| 406 | Test(extension, ext_depth_range_unrestricted, | 416 | Test(extension, ext_depth_range_unrestricted, |
| 407 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 417 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 408 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 418 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 409 | Test(extension, ext_shader_viewport_index_layer, | 419 | Test(extension, ext_shader_viewport_index_layer, |
| 410 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); | 420 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); |
| 411 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, | 421 | Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, |
| 422 | false); | ||
| 423 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, | ||
| 412 | false); | 424 | false); |
| 413 | if (Settings::values.renderer_debug) { | 425 | if (Settings::values.renderer_debug) { |
| 414 | Test(extension, nv_device_diagnostic_checkpoints, | 426 | Test(extension, nv_device_diagnostic_checkpoints, |
| @@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 416 | } | 428 | } |
| 417 | } | 429 | } |
| 418 | 430 | ||
| 419 | if (khr_shader_float16_int8) { | 431 | if (has_khr_shader_float16_int8) { |
| 420 | is_float16_supported = | 432 | is_float16_supported = |
| 421 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | 433 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; |
| 422 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 434 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 423 | } | 435 | } |
| 424 | 436 | ||
| 425 | if (ext_subgroup_size_control) { | 437 | if (has_ext_subgroup_size_control) { |
| 426 | const auto features = | 438 | const auto features = |
| 427 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); | 439 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); |
| 428 | const auto properties = | 440 | const auto properties = |
| @@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 439 | is_warp_potentially_bigger = true; | 451 | is_warp_potentially_bigger = true; |
| 440 | } | 452 | } |
| 441 | 453 | ||
| 454 | if (has_ext_transform_feedback) { | ||
| 455 | const auto features = | ||
| 456 | GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); | ||
| 457 | const auto properties = | ||
| 458 | GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); | ||
| 459 | |||
| 460 | if (features.transformFeedback && features.geometryStreams && | ||
| 461 | properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && | ||
| 462 | properties.transformFeedbackQueries && properties.transformFeedbackDraw) { | ||
| 463 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); | ||
| 464 | ext_transform_feedback = true; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 442 | return extensions; | 468 | return extensions; |
| 443 | } | 469 | } |
| 444 | 470 | ||
| @@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 467 | 493 | ||
| 468 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | 494 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { |
| 469 | const auto supported_features{physical.getFeatures(dldi)}; | 495 | const auto supported_features{physical.getFeatures(dldi)}; |
| 470 | is_shader_storage_img_read_without_format_supported = | 496 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 471 | supported_features.shaderStorageImageReadWithoutFormat; | ||
| 472 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | 497 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); |
| 473 | } | 498 | } |
| 474 | 499 | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730..6e656517f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -122,11 +122,6 @@ public: | |||
| 122 | return properties.limits.maxPushConstantsSize; | 122 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /// Returns true if Shader storage Image Read Without Format supported. | ||
| 126 | bool IsShaderStorageImageReadWithoutFormatSupported() const { | ||
| 127 | return is_shader_storage_img_read_without_format_supported; | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Returns true if ASTC is natively supported. | 125 | /// Returns true if ASTC is natively supported. |
| 131 | bool IsOptimalAstcSupported() const { | 126 | bool IsOptimalAstcSupported() const { |
| 132 | return is_optimal_astc_supported; | 127 | return is_optimal_astc_supported; |
| @@ -147,6 +142,11 @@ public: | |||
| 147 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; | 142 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; |
| 148 | } | 143 | } |
| 149 | 144 | ||
| 145 | /// Returns true if formatless image load is supported. | ||
| 146 | bool IsFormatlessImageLoadSupported() const { | ||
| 147 | return is_formatless_image_load_supported; | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. |
| 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 152 | return khr_uniform_buffer_standard_layout; | 152 | return khr_uniform_buffer_standard_layout; |
| @@ -167,6 +167,11 @@ public: | |||
| 167 | return ext_shader_viewport_index_layer; | 167 | return ext_shader_viewport_index_layer; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | /// Returns true if the device supports VK_EXT_transform_feedback. | ||
| 171 | bool IsExtTransformFeedbackSupported() const { | ||
| 172 | return ext_transform_feedback; | ||
| 173 | } | ||
| 174 | |||
| 170 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | 175 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. |
| 171 | bool IsNvDeviceDiagnosticCheckpoints() const { | 176 | bool IsNvDeviceDiagnosticCheckpoints() const { |
| 172 | return nv_device_diagnostic_checkpoints; | 177 | return nv_device_diagnostic_checkpoints; |
| @@ -214,26 +219,26 @@ private: | |||
| 214 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( | 219 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 215 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 220 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 216 | 221 | ||
| 217 | const vk::PhysicalDevice physical; ///< Physical device. | 222 | const vk::PhysicalDevice physical; ///< Physical device. |
| 218 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 223 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 219 | vk::PhysicalDeviceProperties properties; ///< Device properties. | 224 | vk::PhysicalDeviceProperties properties; ///< Device properties. |
| 220 | UniqueDevice logical; ///< Logical device. | 225 | UniqueDevice logical; ///< Logical device. |
| 221 | vk::Queue graphics_queue; ///< Main graphics queue. | 226 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 222 | vk::Queue present_queue; ///< Main present queue. | 227 | vk::Queue present_queue; ///< Main present queue. |
| 223 | u32 graphics_family{}; ///< Main graphics queue family index. | 228 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 224 | u32 present_family{}; ///< Main present queue family index. | 229 | u32 present_family{}; ///< Main present queue family index. |
| 225 | vk::DriverIdKHR driver_id{}; ///< Driver ID. | 230 | vk::DriverIdKHR driver_id{}; ///< Driver ID. |
| 226 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. | 231 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed |
| 227 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 232 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 228 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 233 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 229 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 234 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 235 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | ||
| 230 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 236 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 231 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 237 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 232 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 238 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 233 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 239 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 240 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | ||
| 234 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | 241 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. |
| 235 | bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage | ||
| 236 | ///< image read without format | ||
| 237 | 242 | ||
| 238 | // Telemetry parameters | 243 | // Telemetry parameters |
| 239 | std::string vendor_name; ///< Device's driver name. | 244 | std::string vendor_name; ///< Device's driver name. |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 144e1e007..056ef495c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -161,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag | |||
| 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, |
| 162 | ProgramCode program_code, u32 main_offset) | 162 | ProgramCode program_code, u32 main_offset) |
| 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, |
| 164 | program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, | 164 | program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, |
| 165 | shader_ir{this->program_code, main_offset, compiler_settings, locker}, | 165 | shader_ir{this->program_code, main_offset, compiler_settings, registry}, |
| 166 | entries{GenerateShaderEntries(shader_ir)} {} | 166 | entries{GenerateShaderEntries(shader_ir)} {} |
| 167 | 167 | ||
| 168 | CachedShader::~CachedShader() = default; | 168 | CachedShader::~CachedShader() = default; |
| @@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 273 | specialization.workgroup_size = key.workgroup_size; | 273 | specialization.workgroup_size = key.workgroup_size; |
| 274 | specialization.shared_memory_size = key.shared_memory_size; | 274 | specialization.shared_memory_size = key.shared_memory_size; |
| 275 | 275 | ||
| 276 | const SPIRVShader spirv_shader{ | 276 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, |
| 277 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | 277 | shader->GetRegistry(), specialization), |
| 278 | shader->GetEntries()}; | 278 | shader->GetEntries()}; |
| 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, |
| 280 | update_descriptor_queue, spirv_shader); | 280 | update_descriptor_queue, spirv_shader); |
| 281 | return *entry; | 281 | return *entry; |
| @@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 324 | const auto& gpu = system.GPU().Maxwell3D(); | 324 | const auto& gpu = system.GPU().Maxwell3D(); |
| 325 | 325 | ||
| 326 | Specialization specialization; | 326 | Specialization specialization; |
| 327 | specialization.primitive_topology = fixed_state.input_assembly.topology; | 327 | if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { |
| 328 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 329 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | 328 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); |
| 330 | specialization.point_size = fixed_state.input_assembly.point_size; | 329 | specialization.point_size = fixed_state.input_assembly.point_size; |
| 331 | } | 330 | } |
| @@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 333 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | 332 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; |
| 334 | } | 333 | } |
| 335 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 334 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 336 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 337 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 338 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 339 | 335 | ||
| 340 | SPIRVProgram program; | 336 | SPIRVProgram program; |
| 341 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 337 | std::vector<vk::DescriptorSetLayoutBinding> bindings; |
| @@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 356 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 352 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 357 | const auto program_type = GetShaderType(program_enum); | 353 | const auto program_type = GetShaderType(program_enum); |
| 358 | const auto& entries = shader->GetEntries(); | 354 | const auto& entries = shader->GetEntries(); |
| 359 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | 355 | program[stage] = { |
| 360 | entries}; | 356 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
| 357 | entries}; | ||
| 361 | 358 | ||
| 362 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | 359 | if (program_enum == Maxwell::ShaderProgram::VertexA) { |
| 363 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 360 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 92a670cc7..21340c9a4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 28 | #include "video_core/shader/const_buffer_locker.h" | 28 | #include "video_core/shader/registry.h" |
| 29 | #include "video_core/shader/shader_ir.h" | 29 | #include "video_core/shader/shader_ir.h" |
| 30 | #include "video_core/surface.h" | 30 | #include "video_core/surface.h" |
| 31 | 31 | ||
| @@ -132,6 +132,10 @@ public: | |||
| 132 | return shader_ir; | 132 | return shader_ir; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 136 | return registry; | ||
| 137 | } | ||
| 138 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | 139 | const VideoCommon::Shader::ShaderIR& GetIR() const { |
| 136 | return shader_ir; | 140 | return shader_ir; |
| 137 | } | 141 | } |
| @@ -147,7 +151,7 @@ private: | |||
| 147 | GPUVAddr gpu_addr{}; | 151 | GPUVAddr gpu_addr{}; |
| 148 | VAddr cpu_addr{}; | 152 | VAddr cpu_addr{}; |
| 149 | ProgramCode program_code; | 153 | ProgramCode program_code; |
| 150 | VideoCommon::Shader::ConstBufferLocker locker; | 154 | VideoCommon::Shader::Registry registry; |
| 151 | VideoCommon::Shader::ShaderIR shader_ir; | 155 | VideoCommon::Shader::ShaderIR shader_ir; |
| 152 | ShaderEntries entries; | 156 | ShaderEntries entries; |
| 153 | }; | 157 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2bcb17b56..f889019c1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 347 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | 347 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); |
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | BeginTransformFeedback(); | ||
| 351 | |||
| 350 | const auto pipeline_layout = pipeline.GetLayout(); | 352 | const auto pipeline_layout = pipeline.GetLayout(); |
| 351 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | 353 | const auto descriptor_set = pipeline.CommitDescriptorSet(); |
| 352 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | 354 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { |
| @@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 356 | } | 358 | } |
| 357 | draw_params.Draw(cmdbuf, dld); | 359 | draw_params.Draw(cmdbuf, dld); |
| 358 | }); | 360 | }); |
| 361 | |||
| 362 | EndTransformFeedback(); | ||
| 359 | } | 363 | } |
| 360 | 364 | ||
| 361 | void RasterizerVulkan::Clear() { | 365 | void RasterizerVulkan::Clear() { |
| @@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 738 | UpdateStencilFaces(regs); | 742 | UpdateStencilFaces(regs); |
| 739 | } | 743 | } |
| 740 | 744 | ||
| 745 | void RasterizerVulkan::BeginTransformFeedback() { | ||
| 746 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 747 | if (regs.tfb_enabled == 0) { | ||
| 748 | return; | ||
| 749 | } | ||
| 750 | |||
| 751 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 752 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 753 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 754 | |||
| 755 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | ||
| 756 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 757 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 758 | |||
| 759 | const auto& binding = regs.tfb_bindings[0]; | ||
| 760 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 761 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 762 | |||
| 763 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 764 | const std::size_t size = binding.buffer_size; | ||
| 765 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 766 | |||
| 767 | scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { | ||
| 768 | cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); | ||
| 769 | cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); | ||
| 770 | }); | ||
| 771 | } | ||
| 772 | |||
| 773 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 774 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 775 | if (regs.tfb_enabled == 0) { | ||
| 776 | return; | ||
| 777 | } | ||
| 778 | |||
| 779 | scheduler.Record( | ||
| 780 | [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); | ||
| 781 | } | ||
| 782 | |||
| 741 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 783 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| 742 | BufferBindings& buffer_bindings) { | 784 | BufferBindings& buffer_bindings) { |
| 743 | const auto& regs = system.GPU().Maxwell3D().regs; | 785 | const auto& regs = system.GPU().Maxwell3D().regs; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96ea05f0a..b2e73d98d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -169,6 +169,10 @@ private: | |||
| 169 | 169 | ||
| 170 | void UpdateDynamicStates(); | 170 | void UpdateDynamicStates(); |
| 171 | 171 | ||
| 172 | void BeginTransformFeedback(); | ||
| 173 | |||
| 174 | void EndTransformFeedback(); | ||
| 175 | |||
| 172 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | 176 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); |
| 173 | 177 | ||
| 174 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 178 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cfcca5af0..b2c298051 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <functional> | 5 | #include <functional> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <optional> | ||
| 8 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include <unordered_map> | ||
| 9 | #include <utility> | 11 | #include <utility> |
| 10 | 12 | ||
| 11 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| @@ -24,6 +26,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 25 | #include "video_core/shader/node.h" | 27 | #include "video_core/shader/node.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 28 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader/transform_feedback.h" | ||
| 27 | 30 | ||
| 28 | namespace Vulkan { | 31 | namespace Vulkan { |
| 29 | 32 | ||
| @@ -93,6 +96,12 @@ struct VertexIndices { | |||
| 93 | std::optional<u32> clip_distances; | 96 | std::optional<u32> clip_distances; |
| 94 | }; | 97 | }; |
| 95 | 98 | ||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 96 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { |
| 97 | ASSERT(!sampler.IsBuffer()); | 106 | ASSERT(!sampler.IsBuffer()); |
| 98 | switch (sampler.GetType()) { | 107 | switch (sampler.GetType()) { |
| @@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) { | |||
| 266 | class SPIRVDecompiler final : public Sirit::Module { | 275 | class SPIRVDecompiler final : public Sirit::Module { |
| 267 | public: | 276 | public: |
| 268 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, | 277 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, |
| 269 | const Specialization& specialization) | 278 | const Registry& registry, const Specialization& specialization) |
| 270 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, | 279 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, |
| 271 | specialization{specialization} { | 280 | registry{registry}, specialization{specialization} { |
| 281 | if (stage != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 272 | AddCapability(spv::Capability::Shader); | 285 | AddCapability(spv::Capability::Shader); |
| 273 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); |
| 274 | AddCapability(spv::Capability::ImageQuery); | 287 | AddCapability(spv::Capability::ImageQuery); |
| @@ -286,6 +299,15 @@ public: | |||
| 286 | AddExtension("SPV_KHR_variable_pointers"); | 299 | AddExtension("SPV_KHR_variable_pointers"); |
| 287 | AddExtension("SPV_KHR_shader_draw_parameters"); | 300 | AddExtension("SPV_KHR_shader_draw_parameters"); |
| 288 | 301 | ||
| 302 | if (!transform_feedback.empty()) { | ||
| 303 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 304 | AddCapability(spv::Capability::TransformFeedback); | ||
| 305 | } else { | ||
| 306 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 307 | "supported on this device"); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 289 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { |
| 290 | if (ir.UsesViewportIndex()) { | 312 | if (ir.UsesViewportIndex()) { |
| 291 | AddCapability(spv::Capability::MultiViewport); | 313 | AddCapability(spv::Capability::MultiViewport); |
| @@ -296,7 +318,7 @@ public: | |||
| 296 | } | 318 | } |
| 297 | } | 319 | } |
| 298 | 320 | ||
| 299 | if (device.IsShaderStorageImageReadWithoutFormatSupported()) { | 321 | if (device.IsFormatlessImageLoadSupported()) { |
| 300 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | 322 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); |
| 301 | } | 323 | } |
| 302 | 324 | ||
| @@ -318,25 +340,29 @@ public: | |||
| 318 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 340 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 319 | header.common2.threads_per_input_primitive); | 341 | header.common2.threads_per_input_primitive); |
| 320 | break; | 342 | break; |
| 321 | case ShaderType::TesselationEval: | 343 | case ShaderType::TesselationEval: { |
| 344 | const auto& info = registry.GetGraphicsInfo(); | ||
| 322 | AddCapability(spv::Capability::Tessellation); | 345 | AddCapability(spv::Capability::Tessellation); |
| 323 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | 346 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); |
| 324 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); | 347 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); |
| 325 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); | 348 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); |
| 326 | AddExecutionMode(main, specialization.tessellation.clockwise | 349 | AddExecutionMode(main, info.tessellation_clockwise |
| 327 | ? spv::ExecutionMode::VertexOrderCw | 350 | ? spv::ExecutionMode::VertexOrderCw |
| 328 | : spv::ExecutionMode::VertexOrderCcw); | 351 | : spv::ExecutionMode::VertexOrderCcw); |
| 329 | break; | 352 | break; |
| 330 | case ShaderType::Geometry: | 353 | } |
| 354 | case ShaderType::Geometry: { | ||
| 355 | const auto& info = registry.GetGraphicsInfo(); | ||
| 331 | AddCapability(spv::Capability::Geometry); | 356 | AddCapability(spv::Capability::Geometry); |
| 332 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | 357 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); |
| 333 | AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); | 358 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); |
| 334 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | 359 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); |
| 335 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 360 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 336 | header.common4.max_output_vertices); | 361 | header.common4.max_output_vertices); |
| 337 | // TODO(Rodrigo): Where can we get this info from? | 362 | // TODO(Rodrigo): Where can we get this info from? |
| 338 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | 363 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); |
| 339 | break; | 364 | break; |
| 365 | } | ||
| 340 | case ShaderType::Fragment: | 366 | case ShaderType::Fragment: |
| 341 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | 367 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); |
| 342 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | 368 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); |
| @@ -545,7 +571,8 @@ private: | |||
| 545 | if (stage != ShaderType::Geometry) { | 571 | if (stage != ShaderType::Geometry) { |
| 546 | return; | 572 | return; |
| 547 | } | 573 | } |
| 548 | const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 574 | const auto& info = registry.GetGraphicsInfo(); |
| 575 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 549 | DeclareInputVertexArray(num_input); | 576 | DeclareInputVertexArray(num_input); |
| 550 | DeclareOutputVertex(); | 577 | DeclareOutputVertex(); |
| 551 | } | 578 | } |
| @@ -742,12 +769,34 @@ private: | |||
| 742 | } | 769 | } |
| 743 | 770 | ||
| 744 | void DeclareOutputAttributes() { | 771 | void DeclareOutputAttributes() { |
| 772 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | |||
| 776 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 745 | for (const auto index : ir.GetOutputAttributes()) { | 777 | for (const auto index : ir.GetOutputAttributes()) { |
| 746 | if (!IsGenericAttribute(index)) { | 778 | if (!IsGenericAttribute(index)) { |
| 747 | continue; | 779 | continue; |
| 748 | } | 780 | } |
| 749 | const u32 location = GetGenericAttributeLocation(index); | 781 | DeclareOutputAttribute(index); |
| 750 | Id type = t_float4; | 782 | } |
| 783 | } | ||
| 784 | |||
| 785 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 786 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 787 | |||
| 788 | const u32 location = GetGenericAttributeLocation(index); | ||
| 789 | u8 element = 0; | ||
| 790 | while (element < 4) { | ||
| 791 | const std::size_t remainder = 4 - element; | ||
| 792 | |||
| 793 | std::size_t num_components = remainder; | ||
| 794 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 795 | if (tfb) { | ||
| 796 | num_components = tfb->components; | ||
| 797 | } | ||
| 798 | |||
| 799 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 751 | Id varying_default = v_varying_default; | 800 | Id varying_default = v_varying_default; |
| 752 | if (IsOutputAttributeArray()) { | 801 | if (IsOutputAttributeArray()) { |
| 753 | const u32 num = GetNumOutputVertices(); | 802 | const u32 num = GetNumOutputVertices(); |
| @@ -760,13 +809,45 @@ private: | |||
| 760 | } | 809 | } |
| 761 | type = TypePointer(spv::StorageClass::Output, type); | 810 | type = TypePointer(spv::StorageClass::Output, type); |
| 762 | 811 | ||
| 812 | std::string name = fmt::format("out_attr{}", location); | ||
| 813 | if (num_components < 4 || element > 0) { | ||
| 814 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 815 | } | ||
| 816 | |||
| 763 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | 817 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); |
| 764 | Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); | 818 | Name(AddGlobalVariable(id), name); |
| 765 | output_attributes.emplace(index, id); | 819 | |
| 820 | GenericVaryingDescription description; | ||
| 821 | description.id = id; | ||
| 822 | description.first_element = element; | ||
| 823 | description.is_scalar = num_components == 1; | ||
| 824 | for (u32 i = 0; i < num_components; ++i) { | ||
| 825 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 826 | output_attributes.emplace(offset, description); | ||
| 827 | } | ||
| 766 | interfaces.push_back(id); | 828 | interfaces.push_back(id); |
| 767 | 829 | ||
| 768 | Decorate(id, spv::Decoration::Location, location); | 830 | Decorate(id, spv::Decoration::Location, location); |
| 831 | if (element > 0) { | ||
| 832 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 833 | } | ||
| 834 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 835 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 836 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 837 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 838 | } | ||
| 839 | |||
| 840 | element += static_cast<u8>(num_components); | ||
| 841 | } | ||
| 842 | } | ||
| 843 | |||
| 844 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 845 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 846 | const auto it = transform_feedback.find(location); | ||
| 847 | if (it == transform_feedback.end()) { | ||
| 848 | return {}; | ||
| 769 | } | 849 | } |
| 850 | return it->second; | ||
| 770 | } | 851 | } |
| 771 | 852 | ||
| 772 | u32 DeclareConstantBuffers(u32 binding) { | 853 | u32 DeclareConstantBuffers(u32 binding) { |
| @@ -898,7 +979,7 @@ private: | |||
| 898 | u32 GetNumInputVertices() const { | 979 | u32 GetNumInputVertices() const { |
| 899 | switch (stage) { | 980 | switch (stage) { |
| 900 | case ShaderType::Geometry: | 981 | case ShaderType::Geometry: |
| 901 | return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 982 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); |
| 902 | case ShaderType::TesselationControl: | 983 | case ShaderType::TesselationControl: |
| 903 | case ShaderType::TesselationEval: | 984 | case ShaderType::TesselationEval: |
| 904 | return NumInputPatches; | 985 | return NumInputPatches; |
| @@ -1346,8 +1427,14 @@ private: | |||
| 1346 | } | 1427 | } |
| 1347 | default: | 1428 | default: |
| 1348 | if (IsGenericAttribute(attribute)) { | 1429 | if (IsGenericAttribute(attribute)) { |
| 1349 | const Id composite = output_attributes.at(attribute); | 1430 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); |
| 1350 | return {ArrayPass(t_out_float, composite, {element}), Type::Float}; | 1431 | const GenericVaryingDescription description = output_attributes.at(offset); |
| 1432 | const Id composite = description.id; | ||
| 1433 | std::vector<u32> indices; | ||
| 1434 | if (!description.is_scalar) { | ||
| 1435 | indices.push_back(element - description.first_element); | ||
| 1436 | } | ||
| 1437 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1351 | } | 1438 | } |
| 1352 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | 1439 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", |
| 1353 | static_cast<u32>(attribute)); | 1440 | static_cast<u32>(attribute)); |
| @@ -1793,7 +1880,7 @@ private: | |||
| 1793 | } | 1880 | } |
| 1794 | 1881 | ||
| 1795 | Expression ImageLoad(Operation operation) { | 1882 | Expression ImageLoad(Operation operation) { |
| 1796 | if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { | 1883 | if (!device.IsFormatlessImageLoadSupported()) { |
| 1797 | return {v_float_zero, Type::Float}; | 1884 | return {v_float_zero, Type::Float}; |
| 1798 | } | 1885 | } |
| 1799 | 1886 | ||
| @@ -2258,11 +2345,11 @@ private: | |||
| 2258 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | 2345 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { |
| 2259 | switch (type) { | 2346 | switch (type) { |
| 2260 | case Type::Float: | 2347 | case Type::Float: |
| 2261 | return {nullptr, t_float2, t_float3, t_float4}; | 2348 | return {t_float, t_float2, t_float3, t_float4}; |
| 2262 | case Type::Int: | 2349 | case Type::Int: |
| 2263 | return {nullptr, t_int2, t_int3, t_int4}; | 2350 | return {t_int, t_int2, t_int3, t_int4}; |
| 2264 | case Type::Uint: | 2351 | case Type::Uint: |
| 2265 | return {nullptr, t_uint2, t_uint3, t_uint4}; | 2352 | return {t_uint, t_uint2, t_uint3, t_uint4}; |
| 2266 | default: | 2353 | default: |
| 2267 | UNIMPLEMENTED(); | 2354 | UNIMPLEMENTED(); |
| 2268 | return {}; | 2355 | return {}; |
| @@ -2495,7 +2582,9 @@ private: | |||
| 2495 | const ShaderIR& ir; | 2582 | const ShaderIR& ir; |
| 2496 | const ShaderType stage; | 2583 | const ShaderType stage; |
| 2497 | const Tegra::Shader::Header header; | 2584 | const Tegra::Shader::Header header; |
| 2585 | const Registry& registry; | ||
| 2498 | const Specialization& specialization; | 2586 | const Specialization& specialization; |
| 2587 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2499 | 2588 | ||
| 2500 | const Id t_void = Name(TypeVoid(), "void"); | 2589 | const Id t_void = Name(TypeVoid(), "void"); |
| 2501 | 2590 | ||
| @@ -2584,7 +2673,7 @@ private: | |||
| 2584 | Id shared_memory{}; | 2673 | Id shared_memory{}; |
| 2585 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | 2674 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; |
| 2586 | std::map<Attribute::Index, Id> input_attributes; | 2675 | std::map<Attribute::Index, Id> input_attributes; |
| 2587 | std::map<Attribute::Index, Id> output_attributes; | 2676 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2588 | std::map<u32, Id> constant_buffers; | 2677 | std::map<u32, Id> constant_buffers; |
| 2589 | std::map<GlobalMemoryBase, Id> global_buffers; | 2678 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2590 | std::map<u32, TexelBuffer> texel_buffers; | 2679 | std::map<u32, TexelBuffer> texel_buffers; |
| @@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2870 | } | 2959 | } |
| 2871 | 2960 | ||
| 2872 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 2961 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 2873 | ShaderType stage, const Specialization& specialization) { | 2962 | ShaderType stage, const VideoCommon::Shader::Registry& registry, |
| 2874 | return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); | 2963 | const Specialization& specialization) { |
| 2964 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 2875 | } | 2965 | } |
| 2876 | 2966 | ||
| 2877 | } // namespace Vulkan | 2967 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9e..ffea4709e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 18 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | 19 | #include "video_core/shader/shader_ir.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -91,17 +92,9 @@ struct Specialization final { | |||
| 91 | u32 shared_memory_size{}; | 92 | u32 shared_memory_size{}; |
| 92 | 93 | ||
| 93 | // Graphics specific | 94 | // Graphics specific |
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | 97 | bool ndc_minus_one_to_one{}; |
| 98 | |||
| 99 | // Tessellation specific | ||
| 100 | struct { | ||
| 101 | Maxwell::TessellationPrimitive primitive{}; | ||
| 102 | Maxwell::TessellationSpacing spacing{}; | ||
| 103 | bool clockwise{}; | ||
| 104 | } tessellation; | ||
| 105 | }; | 98 | }; |
| 106 | // Old gcc versions don't consider this trivially copyable. | 99 | // Old gcc versions don't consider this trivially copyable. |
| 107 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 100 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
| @@ -114,6 +107,8 @@ struct SPIRVShader { | |||
| 114 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | 107 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 115 | 108 | ||
| 116 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 109 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 117 | Tegra::Engines::ShaderType stage, const Specialization& specialization); | 110 | Tegra::Engines::ShaderType stage, |
| 111 | const VideoCommon::Shader::Registry& registry, | ||
| 112 | const Specialization& specialization); | ||
| 118 | 113 | ||
| 119 | } // namespace Vulkan | 114 | } // namespace Vulkan |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index 0638be8cb..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null | |||
| @@ -1,126 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/shader/const_buffer_locker.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Engines::SamplerDescriptor; | ||
| 16 | |||
| 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | ||
| 18 | : stage{shader_stage} {} | ||
| 19 | |||
| 20 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 21 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 22 | : stage{shader_stage}, engine{&engine} {} | ||
| 23 | |||
| 24 | ConstBufferLocker::~ConstBufferLocker() = default; | ||
| 25 | |||
| 26 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | ||
| 27 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 28 | const auto iter = keys.find(key); | ||
| 29 | if (iter != keys.end()) { | ||
| 30 | return iter->second; | ||
| 31 | } | ||
| 32 | if (!engine) { | ||
| 33 | return std::nullopt; | ||
| 34 | } | ||
| 35 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 36 | keys.emplace(key, value); | ||
| 37 | return value; | ||
| 38 | } | ||
| 39 | |||
| 40 | std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||
| 41 | const u32 key = offset; | ||
| 42 | const auto iter = bound_samplers.find(key); | ||
| 43 | if (iter != bound_samplers.end()) { | ||
| 44 | return iter->second; | ||
| 45 | } | ||
| 46 | if (!engine) { | ||
| 47 | return std::nullopt; | ||
| 48 | } | ||
| 49 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 50 | bound_samplers.emplace(key, value); | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||
| 55 | u32 buffer, u32 offset) { | ||
| 56 | const std::pair key = {buffer, offset}; | ||
| 57 | const auto iter = bindless_samplers.find(key); | ||
| 58 | if (iter != bindless_samplers.end()) { | ||
| 59 | return iter->second; | ||
| 60 | } | ||
| 61 | if (!engine) { | ||
| 62 | return std::nullopt; | ||
| 63 | } | ||
| 64 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 65 | bindless_samplers.emplace(key, value); | ||
| 66 | return value; | ||
| 67 | } | ||
| 68 | |||
| 69 | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||
| 70 | if (bound_buffer_saved) { | ||
| 71 | return bound_buffer; | ||
| 72 | } | ||
| 73 | if (!engine) { | ||
| 74 | return std::nullopt; | ||
| 75 | } | ||
| 76 | bound_buffer_saved = true; | ||
| 77 | bound_buffer = engine->GetBoundBuffer(); | ||
| 78 | return bound_buffer; | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 82 | keys.insert_or_assign({buffer, offset}, value); | ||
| 83 | } | ||
| 84 | |||
| 85 | void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 86 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 87 | } | ||
| 88 | |||
| 89 | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 90 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 91 | } | ||
| 92 | |||
| 93 | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||
| 94 | bound_buffer_saved = true; | ||
| 95 | bound_buffer = buffer; | ||
| 96 | } | ||
| 97 | |||
| 98 | bool ConstBufferLocker::IsConsistent() const { | ||
| 99 | if (!engine) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | return std::all_of(keys.begin(), keys.end(), | ||
| 103 | [this](const auto& pair) { | ||
| 104 | const auto [cbuf, offset] = pair.first; | ||
| 105 | const auto value = pair.second; | ||
| 106 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 107 | }) && | ||
| 108 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 109 | [this](const auto& sampler) { | ||
| 110 | const auto [key, value] = sampler; | ||
| 111 | return value == engine->AccessBoundSampler(stage, key); | ||
| 112 | }) && | ||
| 113 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 114 | [this](const auto& sampler) { | ||
| 115 | const auto [cbuf, offset] = sampler.first; | ||
| 116 | const auto value = sampler.second; | ||
| 117 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 118 | }); | ||
| 119 | } | ||
| 120 | |||
| 121 | bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { | ||
| 122 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 123 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index d3ea11087..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/hash.h" | ||
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/guest_driver.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 18 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 19 | using BindlessSamplerMap = | ||
| 20 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 21 | |||
| 22 | /** | ||
| 23 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | ||
| 24 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk | ||
| 25 | * shader compilation. | ||
| 26 | */ | ||
| 27 | class ConstBufferLocker { | ||
| 28 | public: | ||
| 29 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | ||
| 30 | |||
| 31 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 32 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 33 | |||
| 34 | ~ConstBufferLocker(); | ||
| 35 | |||
| 36 | /// Retrieves a key from the locker, if it's registered, it will give the registered value, if | ||
| 37 | /// not it will obtain it from maxwell3d and register it. | ||
| 38 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 39 | |||
| 40 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 41 | |||
| 42 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 43 | |||
| 44 | std::optional<u32> ObtainBoundBuffer(); | ||
| 45 | |||
| 46 | /// Inserts a key. | ||
| 47 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 48 | |||
| 49 | /// Inserts a bound sampler key. | ||
| 50 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 51 | |||
| 52 | /// Inserts a bindless sampler key. | ||
| 53 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 54 | |||
| 55 | /// Set the bound buffer for this locker. | ||
| 56 | void SetBoundBuffer(u32 buffer); | ||
| 57 | |||
| 58 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | ||
| 59 | /// the same value, false otherwise; | ||
| 60 | bool IsConsistent() const; | ||
| 61 | |||
| 62 | /// Returns true if the keys are equal to the other ones in the locker. | ||
| 63 | bool HasEqualKeys(const ConstBufferLocker& rhs) const; | ||
| 64 | |||
| 65 | /// Gives an getter to the const buffer keys in the database. | ||
| 66 | const KeyMap& GetKeys() const { | ||
| 67 | return keys; | ||
| 68 | } | ||
| 69 | |||
| 70 | /// Gets samplers database. | ||
| 71 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 72 | return bound_samplers; | ||
| 73 | } | ||
| 74 | |||
| 75 | /// Gets bindless samplers database. | ||
| 76 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 77 | return bindless_samplers; | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Gets bound buffer used on this shader | ||
| 81 | u32 GetBoundBuffer() const { | ||
| 82 | return bound_buffer; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Obtains access to the guest driver's profile. | ||
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||
| 87 | if (engine) { | ||
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | const Tegra::Engines::ShaderType stage; | ||
| 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 96 | KeyMap keys; | ||
| 97 | BoundSamplerMap bound_samplers; | ||
| 98 | BindlessSamplerMap bindless_samplers; | ||
| 99 | bool bound_buffer_saved{}; | ||
| 100 | u32 bound_buffer{}; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/shader/ast.h" | 14 | #include "video_core/shader/ast.h" |
| 15 | #include "video_core/shader/control_flow.h" | 15 | #include "video_core/shader/control_flow.h" |
| 16 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | 17 | #include "video_core/shader/shader_ir.h" |
| 17 | 18 | ||
| 18 | namespace VideoCommon::Shader { | 19 | namespace VideoCommon::Shader { |
| @@ -64,11 +65,11 @@ struct BlockInfo { | |||
| 64 | }; | 65 | }; |
| 65 | 66 | ||
| 66 | struct CFGRebuildState { | 67 | struct CFGRebuildState { |
| 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | 68 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) |
| 68 | : program_code{program_code}, locker{locker}, start{start} {} | 69 | : program_code{program_code}, registry{registry}, start{start} {} |
| 69 | 70 | ||
| 70 | const ProgramCode& program_code; | 71 | const ProgramCode& program_code; |
| 71 | ConstBufferLocker& locker; | 72 | Registry& registry; |
| 72 | u32 start{}; | 73 | u32 start{}; |
| 73 | std::vector<BlockInfo> block_info; | 74 | std::vector<BlockInfo> block_info; |
| 74 | std::list<u32> inspect_queries; | 75 | std::list<u32> inspect_queries; |
| @@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 438 | const s32 pc_target = offset + result.relative_position; | 439 | const s32 pc_target = offset + result.relative_position; |
| 439 | std::vector<CaseBranch> branches; | 440 | std::vector<CaseBranch> branches; |
| 440 | for (u32 i = 0; i < result.entries; i++) { | 441 | for (u32 i = 0; i < result.entries; i++) { |
| 441 | auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); | 442 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); |
| 442 | if (!key) { | 443 | if (!key) { |
| 443 | return {ParseResult::AbnormalFlow, parse_info}; | 444 | return {ParseResult::AbnormalFlow, parse_info}; |
| 444 | } | 445 | } |
| @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 656 | 657 | ||
| 657 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 658 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 658 | const CompilerSettings& settings, | 659 | const CompilerSettings& settings, |
| 659 | ConstBufferLocker& locker) { | 660 | Registry& registry) { |
| 660 | auto result_out = std::make_unique<ShaderCharacteristics>(); | 661 | auto result_out = std::make_unique<ShaderCharacteristics>(); |
| 661 | if (settings.depth == CompileDepth::BruteForce) { | 662 | if (settings.depth == CompileDepth::BruteForce) { |
| 662 | result_out->settings.depth = CompileDepth::BruteForce; | 663 | result_out->settings.depth = CompileDepth::BruteForce; |
| 663 | return result_out; | 664 | return result_out; |
| 664 | } | 665 | } |
| 665 | 666 | ||
| 666 | CFGRebuildState state{program_code, start_address, locker}; | 667 | CFGRebuildState state{program_code, start_address, registry}; |
| 667 | // Inspect Code and generate blocks | 668 | // Inspect Code and generate blocks |
| 668 | state.labels.clear(); | 669 | state.labels.clear(); |
| 669 | state.labels.emplace(start_address); | 670 | state.labels.emplace(start_address); |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/ast.h" | 13 | #include "video_core/shader/ast.h" |
| 14 | #include "video_core/shader/compiler_settings.h" | 14 | #include "video_core/shader/compiler_settings.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -111,6 +112,6 @@ struct ShaderCharacteristics { | |||
| 111 | 112 | ||
| 112 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 113 | const CompilerSettings& settings, | 114 | const CompilerSettings& settings, |
| 114 | ConstBufferLocker& locker); | 115 | Registry& registry); |
| 115 | 116 | ||
| 116 | } // namespace VideoCommon::Shader | 117 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 34 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 38 | const std::list<Sampler>& used_samplers) { | 38 | const std::list<Sampler>& used_samplers) { |
| 39 | if (gpu_driver == nullptr) { | 39 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | 40 | return; |
| 45 | } | 41 | } |
| 46 | u32 count{}; | 42 | u32 count{}; |
| @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | |||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | 49 | bound_offsets.emplace_back(sampler.GetOffset()); |
| 54 | } | 50 | } |
| 55 | if (count > 1) { | 51 | if (count > 1) { |
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | 52 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); |
| 57 | } | 53 | } |
| 58 | } | 54 | } |
| 59 | 55 | ||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 56 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, |
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | 57 | VideoCore::GuestDriverProfile& gpu_driver, |
| 62 | const std::list<Sampler>& used_samplers) { | 58 | const std::list<Sampler>& used_samplers) { |
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | 59 | const u32 base_offset = sampler_to_deduce.GetOffset(); |
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | 60 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 69 | for (const auto& sampler : used_samplers) { | 61 | for (const auto& sampler : used_samplers) { |
| @@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | |||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | 69 | if (max_offset == std::numeric_limits<u32>::max()) { |
| 78 | return std::nullopt; | 70 | return std::nullopt; |
| 79 | } | 71 | } |
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | 72 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); |
| 81 | } | 73 | } |
| 82 | 74 | ||
| 83 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| @@ -149,7 +141,7 @@ void ShaderIR::Decode() { | |||
| 149 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 141 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 150 | 142 | ||
| 151 | decompiled = false; | 143 | decompiled = false; |
| 152 | auto info = ScanFlow(program_code, main_offset, settings, locker); | 144 | auto info = ScanFlow(program_code, main_offset, settings, registry); |
| 153 | auto& shader_info = *info; | 145 | auto& shader_info = *info; |
| 154 | coverage_begin = shader_info.start; | 146 | coverage_begin = shader_info.start; |
| 155 | coverage_end = shader_info.end; | 147 | coverage_end = shader_info.end; |
| @@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 364 | 356 | ||
| 365 | void ShaderIR::PostDecode() { | 357 | void ShaderIR::PostDecode() { |
| 366 | // Deduce texture handler size if needed | 358 | // Deduce texture handler size if needed |
| 367 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 359 | auto gpu_driver = registry.AccessGuestDriverProfile(); |
| 368 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | 360 | DeduceTextureHandlerSize(gpu_driver, used_samplers); |
| 369 | // Deduce Indexed Samplers | 361 | // Deduce Indexed Samplers |
| 370 | if (!uses_indexed_samplers) { | 362 | if (!uses_indexed_samplers) { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 14 | #include "video_core/shader/node_helper.h" | 14 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample | |||
| 359 | if (sampler_info) { | 360 | if (sampler_info) { |
| 360 | return *sampler_info; | 361 | return *sampler_info; |
| 361 | } | 362 | } |
| 362 | const auto sampler = | 363 | const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) |
| 363 | buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); | 364 | : registry.ObtainBoundSampler(offset); |
| 364 | if (!sampler) { | 365 | if (!sampler) { |
| 365 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 366 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 366 | return SamplerInfo{TextureType::Texture2D, false, false, false}; | 367 | return SamplerInfo{TextureType::Texture2D, false, false, false}; |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..af70b3f35 --- /dev/null +++ b/src/video_core/shader/registry.cpp | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 28 | |||
| 29 | GraphicsInfo info; | ||
| 30 | info.tfb_layouts = graphics.regs.tfb_layouts; | ||
| 31 | info.tfb_varying_locs = graphics.regs.tfb_varying_locs; | ||
| 32 | info.primitive_topology = graphics.regs.draw.topology; | ||
| 33 | info.tessellation_primitive = graphics.regs.tess_mode.prim; | ||
| 34 | info.tessellation_spacing = graphics.regs.tess_mode.spacing; | ||
| 35 | info.tfb_enabled = graphics.regs.tfb_enabled; | ||
| 36 | info.tessellation_clockwise = graphics.regs.tess_mode.cw; | ||
| 37 | return info; | ||
| 38 | } | ||
| 39 | |||
| 40 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 41 | if (shader_stage != ShaderType::Compute) { | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 45 | const auto& launch = compute.launch_description; | ||
| 46 | |||
| 47 | ComputeInfo info; | ||
| 48 | info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; | ||
| 49 | info.local_memory_size_in_words = launch.local_pos_alloc; | ||
| 50 | info.shared_memory_size_in_words = launch.shared_alloc; | ||
| 51 | return info; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // Anonymous namespace | ||
| 55 | |||
| 56 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 57 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 58 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 59 | |||
| 60 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 62 | : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, | ||
| 63 | graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( | ||
| 64 | shader_stage, engine)} {} | ||
| 65 | |||
| 66 | Registry::~Registry() = default; | ||
| 67 | |||
| 68 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 69 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 70 | const auto iter = keys.find(key); | ||
| 71 | if (iter != keys.end()) { | ||
| 72 | return iter->second; | ||
| 73 | } | ||
| 74 | if (!engine) { | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 78 | keys.emplace(key, value); | ||
| 79 | return value; | ||
| 80 | } | ||
| 81 | |||
| 82 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 83 | const u32 key = offset; | ||
| 84 | const auto iter = bound_samplers.find(key); | ||
| 85 | if (iter != bound_samplers.end()) { | ||
| 86 | return iter->second; | ||
| 87 | } | ||
| 88 | if (!engine) { | ||
| 89 | return std::nullopt; | ||
| 90 | } | ||
| 91 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 92 | bound_samplers.emplace(key, value); | ||
| 93 | return value; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, | ||
| 97 | u32 offset) { | ||
| 98 | const std::pair key = {buffer, offset}; | ||
| 99 | const auto iter = bindless_samplers.find(key); | ||
| 100 | if (iter != bindless_samplers.end()) { | ||
| 101 | return iter->second; | ||
| 102 | } | ||
| 103 | if (!engine) { | ||
| 104 | return std::nullopt; | ||
| 105 | } | ||
| 106 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 107 | bindless_samplers.emplace(key, value); | ||
| 108 | return value; | ||
| 109 | } | ||
| 110 | |||
| 111 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 112 | keys.insert_or_assign({buffer, offset}, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 116 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 117 | } | ||
| 118 | |||
| 119 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 120 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool Registry::IsConsistent() const { | ||
| 124 | if (!engine) { | ||
| 125 | return true; | ||
| 126 | } | ||
| 127 | return std::all_of(keys.begin(), keys.end(), | ||
| 128 | [this](const auto& pair) { | ||
| 129 | const auto [cbuf, offset] = pair.first; | ||
| 130 | const auto value = pair.second; | ||
| 131 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 132 | }) && | ||
| 133 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 134 | [this](const auto& sampler) { | ||
| 135 | const auto [key, value] = sampler; | ||
| 136 | return value == engine->AccessBoundSampler(stage, key); | ||
| 137 | }) && | ||
| 138 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 139 | [this](const auto& sampler) { | ||
| 140 | const auto [cbuf, offset] = sampler.first; | ||
| 141 | const auto value = sampler.second; | ||
| 142 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 143 | }); | ||
| 144 | } | ||
| 145 | |||
| 146 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 147 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 148 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 149 | } | ||
| 150 | |||
| 151 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 152 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 153 | return graphics_info; | ||
| 154 | } | ||
| 155 | |||
| 156 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 157 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 158 | return compute_info; | ||
| 159 | } | ||
| 160 | |||
| 161 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..0c80d35fd --- /dev/null +++ b/src/video_core/shader/registry.h | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 23 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 24 | using BindlessSamplerMap = | ||
| 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 26 | |||
| 27 | struct GraphicsInfo { | ||
| 28 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 29 | |||
| 30 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 31 | tfb_layouts{}; | ||
| 32 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 33 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 34 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 35 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 36 | bool tfb_enabled = false; | ||
| 37 | bool tessellation_clockwise = false; | ||
| 38 | }; | ||
| 39 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 40 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 41 | |||
| 42 | struct ComputeInfo { | ||
| 43 | std::array<u32, 3> workgroup_size{}; | ||
| 44 | u32 shared_memory_size_in_words = 0; | ||
| 45 | u32 local_memory_size_in_words = 0; | ||
| 46 | }; | ||
| 47 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 48 | |||
| 49 | struct SerializedRegistryInfo { | ||
| 50 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 51 | u32 bound_buffer = 0; | ||
| 52 | GraphicsInfo graphics; | ||
| 53 | ComputeInfo compute; | ||
| 54 | }; | ||
| 55 | |||
| 56 | /** | ||
| 57 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 58 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 59 | * compilation. | ||
| 60 | */ | ||
| 61 | class Registry { | ||
| 62 | public: | ||
| 63 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 64 | |||
| 65 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 66 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 67 | |||
| 68 | ~Registry(); | ||
| 69 | |||
| 70 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 71 | /// not it will obtain it from maxwell3d and register it. | ||
| 72 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 73 | |||
| 74 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 75 | |||
| 76 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 77 | |||
| 78 | /// Inserts a key. | ||
| 79 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 80 | |||
| 81 | /// Inserts a bound sampler key. | ||
| 82 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 83 | |||
| 84 | /// Inserts a bindless sampler key. | ||
| 85 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 86 | |||
| 87 | /// Checks keys and samplers against engine's current const buffers. | ||
| 88 | /// Returns true if they are the same value, false otherwise. | ||
| 89 | bool IsConsistent() const; | ||
| 90 | |||
| 91 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 92 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 93 | |||
| 94 | /// Returns graphics information from this shader | ||
| 95 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 96 | |||
| 97 | /// Returns compute information from this shader | ||
| 98 | const ComputeInfo& GetComputeInfo() const; | ||
| 99 | |||
| 100 | /// Gives an getter to the const buffer keys in the database. | ||
| 101 | const KeyMap& GetKeys() const { | ||
| 102 | return keys; | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Gets samplers database. | ||
| 106 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 107 | return bound_samplers; | ||
| 108 | } | ||
| 109 | |||
| 110 | /// Gets bindless samplers database. | ||
| 111 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 112 | return bindless_samplers; | ||
| 113 | } | ||
| 114 | |||
| 115 | /// Gets bound buffer used on this shader | ||
| 116 | u32 GetBoundBuffer() const { | ||
| 117 | return bound_buffer; | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Obtains access to the guest driver's profile. | ||
| 121 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 122 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 123 | } | ||
| 124 | |||
| 125 | private: | ||
| 126 | const Tegra::Engines::ShaderType stage; | ||
| 127 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 128 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 129 | KeyMap keys; | ||
| 130 | BoundSamplerMap bound_samplers; | ||
| 131 | BindlessSamplerMap bindless_samplers; | ||
| 132 | u32 bound_buffer; | ||
| 133 | GraphicsInfo graphics_info; | ||
| 134 | ComputeInfo compute_info; | ||
| 135 | }; | ||
| 136 | |||
| 137 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..425927777 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/node_helper.h" | 13 | #include "video_core/shader/node_helper.h" |
| 14 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 15 | #include "video_core/shader/shader_ir.h" |
| 15 | 16 | ||
| 16 | namespace VideoCommon::Shader { | 17 | namespace VideoCommon::Shader { |
| @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; | |||
| 24 | using Tegra::Shader::Register; | 25 | using Tegra::Shader::Register; |
| 25 | 26 | ||
| 26 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 27 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 27 | ConstBufferLocker& locker) | 28 | Registry& registry) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 29 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { |
| 29 | Decode(); | 30 | Decode(); |
| 30 | PostDecode(); | 31 | PostDecode(); |
| 31 | } | 32 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..dde036b40 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | #include "video_core/engines/shader_header.h" | 18 | #include "video_core/engines/shader_header.h" |
| 19 | #include "video_core/shader/ast.h" | 19 | #include "video_core/shader/ast.h" |
| 20 | #include "video_core/shader/compiler_settings.h" | 20 | #include "video_core/shader/compiler_settings.h" |
| 21 | #include "video_core/shader/const_buffer_locker.h" | ||
| 22 | #include "video_core/shader/node.h" | 21 | #include "video_core/shader/node.h" |
| 22 | #include "video_core/shader/registry.h" | ||
| 23 | 23 | ||
| 24 | namespace VideoCommon::Shader { | 24 | namespace VideoCommon::Shader { |
| 25 | 25 | ||
| @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { | |||
| 69 | class ShaderIR final { | 69 | class ShaderIR final { |
| 70 | public: | 70 | public: |
| 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 72 | ConstBufferLocker& locker); | 72 | Registry& registry); |
| 73 | ~ShaderIR(); | 73 | ~ShaderIR(); |
| 74 | 74 | ||
| 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -414,7 +414,7 @@ private: | |||
| 414 | const ProgramCode& program_code; | 414 | const ProgramCode& program_code; |
| 415 | const u32 main_offset; | 415 | const u32 main_offset; |
| 416 | const CompilerSettings settings; | 416 | const CompilerSettings settings; |
| 417 | ConstBufferLocker& locker; | 417 | Registry& registry; |
| 418 | 418 | ||
| 419 | bool decompiled{}; | 419 | bool decompiled{}; |
| 420 | bool disable_flow_stack{}; | 420 | bool disable_flow_stack{}; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); |
| 82 | return {tracked, track}; | 82 | return {tracked, track}; |
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 84 | auto bound_buffer = locker.ObtainBoundBuffer(); | 84 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 85 | if (!bound_buffer) { | 85 | if (bound_buffer != cbuf->GetIndex()) { |
| 86 | return {}; | 86 | return {}; |
| 87 | } | 87 | } |
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | 88 | const auto pair = DecoupleIndirectRead(*operation); |
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | 89 | if (!pair) { |
| 93 | return {}; | 90 | return {}; |
| 94 | } | 91 | } |
| 95 | auto [gpr, base_offset] = *pair; | 92 | auto [gpr, base_offset] = *pair; |
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | 93 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); |
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 94 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); |
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | 95 | const u32 bindless_cv = NewCustomVariable(); |
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | 96 | const Node op = |
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | 97 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); |
| 104 | 98 | ||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | 99 | const Node cv_node = GetCustomVariable(bindless_cv); |
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | 100 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); |
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp new file mode 100644 index 000000000..22a933761 --- /dev/null +++ b/src/video_core/shader/transform_feedback.cpp | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h new file mode 100644 index 000000000..77d05f64c --- /dev/null +++ b/src/video_core/shader/transform_feedback.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index f00839313..9931c5ef7 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta | |||
| 113 | params.height = tic.Height(); | 113 | params.height = tic.Height(); |
| 114 | params.depth = tic.Depth(); | 114 | params.depth = tic.Depth(); |
| 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); |
| 116 | if (params.target == SurfaceTarget::TextureCubemap || | 116 | if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { |
| 117 | params.target == SurfaceTarget::TextureCubeArray) { | 117 | params.depth = 1; |
| 118 | } else if (params.target == SurfaceTarget::TextureCubemap || | ||
| 119 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 118 | params.depth *= 6; | 120 | params.depth *= 6; |
| 119 | } | 121 | } |
| 120 | params.num_levels = tic.max_mip_level + 1; | 122 | params.num_levels = tic.max_mip_level + 1; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 51373b687..6cdbe63d0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -104,6 +104,11 @@ public: | |||
| 104 | if (!cache_addr) { | 104 | if (!cache_addr) { |
| 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 106 | } | 106 | } |
| 107 | |||
| 108 | if (!IsTypeCompatible(tic.texture_type, entry)) { | ||
| 109 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 110 | } | ||
| 111 | |||
| 107 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 112 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; |
| 108 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 113 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); |
| 109 | if (guard_samplers) { | 114 | if (guard_samplers) { |
| @@ -914,13 +919,15 @@ private: | |||
| 914 | params.width = 1; | 919 | params.width = 1; |
| 915 | params.height = 1; | 920 | params.height = 1; |
| 916 | params.depth = 1; | 921 | params.depth = 1; |
| 922 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | ||
| 923 | params.depth = 6; | ||
| 924 | } | ||
| 917 | params.pitch = 4; | 925 | params.pitch = 4; |
| 918 | params.num_levels = 1; | 926 | params.num_levels = 1; |
| 919 | params.emulated_levels = 1; | 927 | params.emulated_levels = 1; |
| 920 | params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; | 928 | params.pixel_format = VideoCore::Surface::PixelFormat::R8U; |
| 921 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 929 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; |
| 922 | auto surface = CreateSurface(0ULL, params); | 930 | auto surface = CreateSurface(0ULL, params); |
| 923 | invalid_memory.clear(); | ||
| 924 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | 931 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); |
| 925 | surface->UploadTexture(invalid_memory); | 932 | surface->UploadTexture(invalid_memory); |
| 926 | surface->MarkAsModified(false, Tick()); | 933 | surface->MarkAsModified(false, Tick()); |
| @@ -1082,6 +1089,36 @@ private: | |||
| 1082 | return siblings_table[static_cast<std::size_t>(format)]; | 1089 | return siblings_table[static_cast<std::size_t>(format)]; |
| 1083 | } | 1090 | } |
| 1084 | 1091 | ||
| 1092 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | ||
| 1093 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | ||
| 1094 | const VideoCommon::Shader::Sampler& entry) { | ||
| 1095 | const auto shader_type = entry.GetType(); | ||
| 1096 | switch (tic_type) { | ||
| 1097 | case Tegra::Texture::TextureType::Texture1D: | ||
| 1098 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 1099 | return shader_type == Tegra::Shader::TextureType::Texture1D; | ||
| 1100 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 1101 | // TODO(Rodrigo): Assume as valid for now | ||
| 1102 | return true; | ||
| 1103 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1104 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1105 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1106 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1107 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1108 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1109 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1110 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1111 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1112 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1113 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1114 | return true; | ||
| 1115 | } | ||
| 1116 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray(); | ||
| 1117 | } | ||
| 1118 | UNREACHABLE(); | ||
| 1119 | return true; | ||
| 1120 | } | ||
| 1121 | |||
| 1085 | struct FramebufferTargetInfo { | 1122 | struct FramebufferTargetInfo { |
| 1086 | TSurface target; | 1123 | TSurface target; |
| 1087 | TView view; | 1124 | TView view; |
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp index 4f2bfab48..2a6483370 100644 --- a/src/yuzu/loading_screen.cpp +++ b/src/yuzu/loading_screen.cpp | |||
| @@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"( | |||
| 34 | QProgressBar {} | 34 | QProgressBar {} |
| 35 | QProgressBar::chunk {})"; | 35 | QProgressBar::chunk {})"; |
| 36 | 36 | ||
| 37 | constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"( | ||
| 38 | QProgressBar { | ||
| 39 | background-color: black; | ||
| 40 | border: 2px solid white; | ||
| 41 | border-radius: 4px; | ||
| 42 | padding: 2px; | ||
| 43 | } | ||
| 44 | QProgressBar::chunk { | ||
| 45 | background-color: #0ab9e6; | ||
| 46 | width: 1px; | ||
| 47 | })"; | ||
| 48 | |||
| 49 | constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( | 37 | constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( |
| 50 | QProgressBar { | 38 | QProgressBar { |
| 51 | background-color: black; | 39 | background-color: black; |
| @@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent) | |||
| 100 | 88 | ||
| 101 | stage_translations = { | 89 | stage_translations = { |
| 102 | {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, | 90 | {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, |
| 103 | {VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")}, | ||
| 104 | {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, | 91 | {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, |
| 105 | {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, | 92 | {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, |
| 106 | }; | 93 | }; |
| 107 | progressbar_style = { | 94 | progressbar_style = { |
| 108 | {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, | 95 | {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, |
| 109 | {VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE}, | ||
| 110 | {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, | 96 | {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, |
| 111 | {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, | 97 | {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, |
| 112 | }; | 98 | }; |
| @@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size | |||
| 192 | } | 178 | } |
| 193 | 179 | ||
| 194 | // update labels and progress bar | 180 | // update labels and progress bar |
| 195 | if (stage == VideoCore::LoadCallbackStage::Decompile || | 181 | if (stage == VideoCore::LoadCallbackStage::Build) { |
| 196 | stage == VideoCore::LoadCallbackStage::Build) { | ||
| 197 | ui->stage->setText(stage_translations[stage].arg(value).arg(total)); | 182 | ui->stage->setText(stage_translations[stage].arg(value).arg(total)); |
| 198 | } else { | 183 | } else { |
| 199 | ui->stage->setText(stage_translations[stage]); | 184 | ui->stage->setText(stage_translations[stage]); |