diff options
Diffstat (limited to 'src')
71 files changed, 2658 insertions, 2401 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9afc6105d..fbebed715 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -38,8 +38,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h" |
| 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp" |
| 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" | 40 | "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h" |
| 41 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp" | ||
| 42 | "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h" | ||
| 43 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" | 41 | "${VIDEO_CORE}/shader/decode/arithmetic.cpp" |
| 44 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" | 42 | "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp" |
| 45 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" | 43 | "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp" |
| @@ -72,8 +70,6 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 72 | "${VIDEO_CORE}/shader/ast.h" | 70 | "${VIDEO_CORE}/shader/ast.h" |
| 73 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | 71 | "${VIDEO_CORE}/shader/compiler_settings.cpp" |
| 74 | "${VIDEO_CORE}/shader/compiler_settings.h" | 72 | "${VIDEO_CORE}/shader/compiler_settings.h" |
| 75 | "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||
| 77 | "${VIDEO_CORE}/shader/control_flow.cpp" | 73 | "${VIDEO_CORE}/shader/control_flow.cpp" |
| 78 | "${VIDEO_CORE}/shader/control_flow.h" | 74 | "${VIDEO_CORE}/shader/control_flow.h" |
| 79 | "${VIDEO_CORE}/shader/decode.cpp" | 75 | "${VIDEO_CORE}/shader/decode.cpp" |
| @@ -82,9 +78,13 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 82 | "${VIDEO_CORE}/shader/node.h" | 78 | "${VIDEO_CORE}/shader/node.h" |
| 83 | "${VIDEO_CORE}/shader/node_helper.cpp" | 79 | "${VIDEO_CORE}/shader/node_helper.cpp" |
| 84 | "${VIDEO_CORE}/shader/node_helper.h" | 80 | "${VIDEO_CORE}/shader/node_helper.h" |
| 81 | "${VIDEO_CORE}/shader/registry.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/registry.h" | ||
| 85 | "${VIDEO_CORE}/shader/shader_ir.cpp" | 83 | "${VIDEO_CORE}/shader/shader_ir.cpp" |
| 86 | "${VIDEO_CORE}/shader/shader_ir.h" | 84 | "${VIDEO_CORE}/shader/shader_ir.h" |
| 87 | "${VIDEO_CORE}/shader/track.cpp" | 85 | "${VIDEO_CORE}/shader/track.cpp" |
| 86 | "${VIDEO_CORE}/shader/transform_feedback.cpp" | ||
| 87 | "${VIDEO_CORE}/shader/transform_feedback.h" | ||
| 88 | # and also check that the scm_rev files haven't changed | 88 | # and also check that the scm_rev files haven't changed |
| 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" | 89 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" |
| 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" | 90 | "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h" |
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index 69b7abc54..566b57b62 100644 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp | |||
| @@ -16,7 +16,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 16 | 16 | ||
| 17 | pointers.resize(num_page_table_entries); | 17 | pointers.resize(num_page_table_entries); |
| 18 | attributes.resize(num_page_table_entries); | 18 | attributes.resize(num_page_table_entries); |
| 19 | backing_addr.resize(num_page_table_entries); | ||
| 20 | 19 | ||
| 21 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the | 20 | // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the |
| 22 | // vector size is subsequently decreased (via resize), the vector might not automatically | 21 | // vector size is subsequently decreased (via resize), the vector might not automatically |
| @@ -25,6 +24,17 @@ void PageTable::Resize(std::size_t address_space_width_in_bits) { | |||
| 25 | 24 | ||
| 26 | pointers.shrink_to_fit(); | 25 | pointers.shrink_to_fit(); |
| 27 | attributes.shrink_to_fit(); | 26 | attributes.shrink_to_fit(); |
| 27 | } | ||
| 28 | |||
| 29 | BackingPageTable::BackingPageTable(std::size_t page_size_in_bits) : PageTable{page_size_in_bits} {} | ||
| 30 | |||
| 31 | BackingPageTable::~BackingPageTable() = default; | ||
| 32 | |||
| 33 | void BackingPageTable::Resize(std::size_t address_space_width_in_bits) { | ||
| 34 | PageTable::Resize(address_space_width_in_bits); | ||
| 35 | const std::size_t num_page_table_entries = 1ULL | ||
| 36 | << (address_space_width_in_bits - page_size_in_bits); | ||
| 37 | backing_addr.resize(num_page_table_entries); | ||
| 28 | backing_addr.shrink_to_fit(); | 38 | backing_addr.shrink_to_fit(); |
| 29 | } | 39 | } |
| 30 | 40 | ||
diff --git a/src/common/page_table.h b/src/common/page_table.h index 8b8ff0bb8..dbc272ab7 100644 --- a/src/common/page_table.h +++ b/src/common/page_table.h | |||
| @@ -76,9 +76,20 @@ struct PageTable { | |||
| 76 | */ | 76 | */ |
| 77 | std::vector<PageType> attributes; | 77 | std::vector<PageType> attributes; |
| 78 | 78 | ||
| 79 | std::vector<u64> backing_addr; | ||
| 80 | |||
| 81 | const std::size_t page_size_in_bits{}; | 79 | const std::size_t page_size_in_bits{}; |
| 82 | }; | 80 | }; |
| 83 | 81 | ||
| 82 | /** | ||
| 83 | * A more advanced Page Table with the ability to save a backing address when using it | ||
| 84 | * depends on another MMU. | ||
| 85 | */ | ||
| 86 | struct BackingPageTable : PageTable { | ||
| 87 | explicit BackingPageTable(std::size_t page_size_in_bits); | ||
| 88 | ~BackingPageTable(); | ||
| 89 | |||
| 90 | void Resize(std::size_t address_space_width_in_bits); | ||
| 91 | |||
| 92 | std::vector<u64> backing_addr; | ||
| 93 | }; | ||
| 94 | |||
| 84 | } // namespace Common | 95 | } // namespace Common |
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index 57b1a2bca..78d4acd95 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp | |||
| @@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) { | |||
| 53 | return {}; | 53 | return {}; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | std::vector<char> raw_data(binary_list->GetSize()); | 56 | std::vector<char> raw_data(binary_list->GetSize() + 1); |
| 57 | binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); | 57 | binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); |
| 58 | 58 | ||
| 59 | std::stringstream data_stream{raw_data.data()}; | 59 | std::stringstream data_stream{raw_data.data()}; |
diff --git a/src/core/settings.h b/src/core/settings.h index cb5979e6f..12e2cc9e7 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -387,6 +387,7 @@ struct Values { | |||
| 387 | 387 | ||
| 388 | s32 current_user; | 388 | s32 current_user; |
| 389 | s32 language_index; | 389 | s32 language_index; |
| 390 | s32 sound_index; | ||
| 390 | 391 | ||
| 391 | // Controls | 392 | // Controls |
| 392 | std::array<PlayerInput, 10> players; | 393 | std::array<PlayerInput, 10> players; |
diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index ca99cc22f..8c6ef1394 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | 5 | #include <mutex> |
| 6 | #include <optional> | ||
| 6 | #include <tuple> | 7 | #include <tuple> |
| 7 | 8 | ||
| 8 | #include "common/param_package.h" | 9 | #include "common/param_package.h" |
| @@ -44,7 +45,7 @@ public: | |||
| 44 | std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { | 45 | std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { |
| 45 | { | 46 | { |
| 46 | std::lock_guard guard(status->update_mutex); | 47 | std::lock_guard guard(status->update_mutex); |
| 47 | status->touch_calibration.emplace(); | 48 | status->touch_calibration = DeviceStatus::CalibrationData{}; |
| 48 | // These default values work well for DS4 but probably not other touch inputs | 49 | // These default values work well for DS4 but probably not other touch inputs |
| 49 | status->touch_calibration->min_x = params.Get("min_x", 100); | 50 | status->touch_calibration->min_x = params.Get("min_x", 100); |
| 50 | status->touch_calibration->min_y = params.Get("min_y", 50); | 51 | status->touch_calibration->min_y = params.Get("min_y", 50); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 14f3b4569..91df062d7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -65,8 +65,6 @@ add_library(video_core STATIC | |||
| 65 | renderer_opengl/gl_shader_decompiler.h | 65 | renderer_opengl/gl_shader_decompiler.h |
| 66 | renderer_opengl/gl_shader_disk_cache.cpp | 66 | renderer_opengl/gl_shader_disk_cache.cpp |
| 67 | renderer_opengl/gl_shader_disk_cache.h | 67 | renderer_opengl/gl_shader_disk_cache.h |
| 68 | renderer_opengl/gl_shader_gen.cpp | ||
| 69 | renderer_opengl/gl_shader_gen.h | ||
| 70 | renderer_opengl/gl_shader_manager.cpp | 68 | renderer_opengl/gl_shader_manager.cpp |
| 71 | renderer_opengl/gl_shader_manager.h | 69 | renderer_opengl/gl_shader_manager.h |
| 72 | renderer_opengl/gl_shader_util.cpp | 70 | renderer_opengl/gl_shader_util.cpp |
| @@ -118,8 +116,6 @@ add_library(video_core STATIC | |||
| 118 | shader/ast.h | 116 | shader/ast.h |
| 119 | shader/compiler_settings.cpp | 117 | shader/compiler_settings.cpp |
| 120 | shader/compiler_settings.h | 118 | shader/compiler_settings.h |
| 121 | shader/const_buffer_locker.cpp | ||
| 122 | shader/const_buffer_locker.h | ||
| 123 | shader/control_flow.cpp | 119 | shader/control_flow.cpp |
| 124 | shader/control_flow.h | 120 | shader/control_flow.h |
| 125 | shader/decode.cpp | 121 | shader/decode.cpp |
| @@ -128,9 +124,13 @@ add_library(video_core STATIC | |||
| 128 | shader/node_helper.cpp | 124 | shader/node_helper.cpp |
| 129 | shader/node_helper.h | 125 | shader/node_helper.h |
| 130 | shader/node.h | 126 | shader/node.h |
| 127 | shader/registry.cpp | ||
| 128 | shader/registry.h | ||
| 131 | shader/shader_ir.cpp | 129 | shader/shader_ir.cpp |
| 132 | shader/shader_ir.h | 130 | shader/shader_ir.h |
| 133 | shader/track.cpp | 131 | shader/track.cpp |
| 132 | shader/transform_feedback.cpp | ||
| 133 | shader/transform_feedback.h | ||
| 134 | surface.cpp | 134 | surface.cpp |
| 135 | surface.h | 135 | surface.h |
| 136 | texture_cache/format_lookup_table.cpp | 136 | texture_cache/format_lookup_table.cpp |
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 4429f3405..e16075993 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -15,14 +15,6 @@ namespace VideoCommon::Dirty { | |||
| 15 | 15 | ||
| 16 | using Tegra::Engines::Maxwell3D; | 16 | using Tegra::Engines::Maxwell3D; |
| 17 | 17 | ||
| 18 | void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store) { | ||
| 19 | store[RenderTargets] = true; | ||
| 20 | store[ZetaBuffer] = true; | ||
| 21 | for (std::size_t i = 0; i < Maxwell3D::Regs::NumRenderTargets; ++i) { | ||
| 22 | store[ColorBuffer0 + i] = true; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { |
| 27 | static constexpr std::size_t num_per_rt = NUM(rt[0]); | 19 | static constexpr std::size_t num_per_rt = NUM(rt[0]); |
| 28 | static constexpr std::size_t begin = OFF(rt); | 20 | static constexpr std::size_t begin = OFF(rt); |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 0dbafd3ef..3f6c1d83a 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -44,8 +44,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ | |||
| 44 | FillBlock(tables[1], begin, num, index_b); | 44 | FillBlock(tables[1], begin, num, index_b); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store); | ||
| 48 | |||
| 49 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); | 47 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); |
| 50 | 48 | ||
| 51 | } // namespace VideoCommon::Dirty | 49 | } // namespace VideoCommon::Dirty |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index d56a47710..724ee0fd6 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -16,11 +16,12 @@ namespace Tegra::Engines { | |||
| 16 | 16 | ||
| 17 | struct SamplerDescriptor { | 17 | struct SamplerDescriptor { |
| 18 | union { | 18 | union { |
| 19 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; | 19 | u32 raw = 0; |
| 20 | BitField<20, 1, u32> is_array; | 20 | BitField<0, 2, Tegra::Shader::TextureType> texture_type; |
| 21 | BitField<21, 1, u32> is_buffer; | 21 | BitField<2, 3, Tegra::Texture::ComponentType> component_type; |
| 22 | BitField<22, 1, u32> is_shadow; | 22 | BitField<5, 1, u32> is_array; |
| 23 | u32 raw{}; | 23 | BitField<6, 1, u32> is_buffer; |
| 24 | BitField<7, 1, u32> is_shadow; | ||
| 24 | }; | 25 | }; |
| 25 | 26 | ||
| 26 | bool operator==(const SamplerDescriptor& rhs) const noexcept { | 27 | bool operator==(const SamplerDescriptor& rhs) const noexcept { |
| @@ -31,68 +32,48 @@ struct SamplerDescriptor { | |||
| 31 | return !operator==(rhs); | 32 | return !operator==(rhs); |
| 32 | } | 33 | } |
| 33 | 34 | ||
| 34 | static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { | 35 | static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) { |
| 36 | using Tegra::Shader::TextureType; | ||
| 35 | SamplerDescriptor result; | 37 | SamplerDescriptor result; |
| 36 | switch (tic_texture_type) { | 38 | |
| 39 | // This is going to be used to determine the shading language type. | ||
| 40 | // Because of that we don't care about all component types on color textures. | ||
| 41 | result.component_type.Assign(tic.r_type.Value()); | ||
| 42 | |||
| 43 | switch (tic.texture_type.Value()) { | ||
| 37 | case Tegra::Texture::TextureType::Texture1D: | 44 | case Tegra::Texture::TextureType::Texture1D: |
| 38 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 45 | result.texture_type.Assign(TextureType::Texture1D); |
| 39 | result.is_array.Assign(0); | ||
| 40 | result.is_buffer.Assign(0); | ||
| 41 | result.is_shadow.Assign(0); | ||
| 42 | return result; | 46 | return result; |
| 43 | case Tegra::Texture::TextureType::Texture2D: | 47 | case Tegra::Texture::TextureType::Texture2D: |
| 44 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 48 | result.texture_type.Assign(TextureType::Texture2D); |
| 45 | result.is_array.Assign(0); | ||
| 46 | result.is_buffer.Assign(0); | ||
| 47 | result.is_shadow.Assign(0); | ||
| 48 | return result; | 49 | return result; |
| 49 | case Tegra::Texture::TextureType::Texture3D: | 50 | case Tegra::Texture::TextureType::Texture3D: |
| 50 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); | 51 | result.texture_type.Assign(TextureType::Texture3D); |
| 51 | result.is_array.Assign(0); | ||
| 52 | result.is_buffer.Assign(0); | ||
| 53 | result.is_shadow.Assign(0); | ||
| 54 | return result; | 52 | return result; |
| 55 | case Tegra::Texture::TextureType::TextureCubemap: | 53 | case Tegra::Texture::TextureType::TextureCubemap: |
| 56 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | 54 | result.texture_type.Assign(TextureType::TextureCube); |
| 57 | result.is_array.Assign(0); | ||
| 58 | result.is_buffer.Assign(0); | ||
| 59 | result.is_shadow.Assign(0); | ||
| 60 | return result; | 55 | return result; |
| 61 | case Tegra::Texture::TextureType::Texture1DArray: | 56 | case Tegra::Texture::TextureType::Texture1DArray: |
| 62 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 57 | result.texture_type.Assign(TextureType::Texture1D); |
| 63 | result.is_array.Assign(1); | 58 | result.is_array.Assign(1); |
| 64 | result.is_buffer.Assign(0); | ||
| 65 | result.is_shadow.Assign(0); | ||
| 66 | return result; | 59 | return result; |
| 67 | case Tegra::Texture::TextureType::Texture2DArray: | 60 | case Tegra::Texture::TextureType::Texture2DArray: |
| 68 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 61 | result.texture_type.Assign(TextureType::Texture2D); |
| 69 | result.is_array.Assign(1); | 62 | result.is_array.Assign(1); |
| 70 | result.is_buffer.Assign(0); | ||
| 71 | result.is_shadow.Assign(0); | ||
| 72 | return result; | 63 | return result; |
| 73 | case Tegra::Texture::TextureType::Texture1DBuffer: | 64 | case Tegra::Texture::TextureType::Texture1DBuffer: |
| 74 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | 65 | result.texture_type.Assign(TextureType::Texture1D); |
| 75 | result.is_array.Assign(0); | ||
| 76 | result.is_buffer.Assign(1); | 66 | result.is_buffer.Assign(1); |
| 77 | result.is_shadow.Assign(0); | ||
| 78 | return result; | 67 | return result; |
| 79 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | 68 | case Tegra::Texture::TextureType::Texture2DNoMipmap: |
| 80 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 69 | result.texture_type.Assign(TextureType::Texture2D); |
| 81 | result.is_array.Assign(0); | ||
| 82 | result.is_buffer.Assign(0); | ||
| 83 | result.is_shadow.Assign(0); | ||
| 84 | return result; | 70 | return result; |
| 85 | case Tegra::Texture::TextureType::TextureCubeArray: | 71 | case Tegra::Texture::TextureType::TextureCubeArray: |
| 86 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | 72 | result.texture_type.Assign(TextureType::TextureCube); |
| 87 | result.is_array.Assign(1); | 73 | result.is_array.Assign(1); |
| 88 | result.is_buffer.Assign(0); | ||
| 89 | result.is_shadow.Assign(0); | ||
| 90 | return result; | 74 | return result; |
| 91 | default: | 75 | default: |
| 92 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | 76 | result.texture_type.Assign(TextureType::Texture2D); |
| 93 | result.is_array.Assign(0); | ||
| 94 | result.is_buffer.Assign(0); | ||
| 95 | result.is_shadow.Assign(0); | ||
| 96 | return result; | 77 | return result; |
| 97 | } | 78 | } |
| 98 | } | 79 | } |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ae52afa79..368c75a66 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -89,7 +89,7 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 89 | 89 | ||
| 90 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 90 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 91 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 91 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 92 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | 92 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 93 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 93 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
| 94 | return result; | 94 | return result; |
| 95 | } | 95 | } |
| @@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | |||
| 119 | Texture::TICEntry tic_entry; | 119 | Texture::TICEntry tic_entry; |
| 120 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 120 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 121 | 121 | ||
| 122 | const auto r_type{tic_entry.r_type.Value()}; | ||
| 123 | const auto g_type{tic_entry.g_type.Value()}; | ||
| 124 | const auto b_type{tic_entry.b_type.Value()}; | ||
| 125 | const auto a_type{tic_entry.a_type.Value()}; | ||
| 126 | |||
| 127 | // TODO(Subv): Different data types for separate components are not supported | ||
| 128 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | ||
| 129 | |||
| 130 | return tic_entry; | 122 | return tic_entry; |
| 131 | } | 123 | } |
| 132 | 124 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 89050361e..ce536e29b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -638,7 +638,7 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 638 | 638 | ||
| 639 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 639 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; |
| 640 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 640 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 641 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | 641 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 642 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 642 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
| 643 | return result; | 643 | return result; |
| 644 | } | 644 | } |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 491cff370..8a9e9992e 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | static constexpr std::size_t NumVaryings = 31; | 67 | static constexpr std::size_t NumVaryings = 31; |
| 68 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number | 68 | static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number |
| 69 | static constexpr std::size_t NumClipDistances = 8; | 69 | static constexpr std::size_t NumClipDistances = 8; |
| 70 | static constexpr std::size_t NumTransformFeedbackBuffers = 4; | ||
| 70 | static constexpr std::size_t MaxShaderProgram = 6; | 71 | static constexpr std::size_t MaxShaderProgram = 6; |
| 71 | static constexpr std::size_t MaxShaderStage = 5; | 72 | static constexpr std::size_t MaxShaderStage = 5; |
| 72 | // Maximum number of const buffers per shader stage. | 73 | // Maximum number of const buffers per shader stage. |
| @@ -524,6 +525,12 @@ public: | |||
| 524 | FractionalEven = 2, | 525 | FractionalEven = 2, |
| 525 | }; | 526 | }; |
| 526 | 527 | ||
| 528 | enum class PolygonMode : u32 { | ||
| 529 | Point = 0x1b00, | ||
| 530 | Line = 0x1b01, | ||
| 531 | Fill = 0x1b02, | ||
| 532 | }; | ||
| 533 | |||
| 527 | struct RenderTargetConfig { | 534 | struct RenderTargetConfig { |
| 528 | u32 address_high; | 535 | u32 address_high; |
| 529 | u32 address_low; | 536 | u32 address_low; |
| @@ -621,6 +628,29 @@ public: | |||
| 621 | float depth_range_far; | 628 | float depth_range_far; |
| 622 | }; | 629 | }; |
| 623 | 630 | ||
| 631 | struct TransformFeedbackBinding { | ||
| 632 | u32 buffer_enable; | ||
| 633 | u32 address_high; | ||
| 634 | u32 address_low; | ||
| 635 | s32 buffer_size; | ||
| 636 | s32 buffer_offset; | ||
| 637 | INSERT_UNION_PADDING_WORDS(3); | ||
| 638 | |||
| 639 | GPUVAddr Address() const { | ||
| 640 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||
| 641 | address_low); | ||
| 642 | } | ||
| 643 | }; | ||
| 644 | static_assert(sizeof(TransformFeedbackBinding) == 32); | ||
| 645 | |||
| 646 | struct TransformFeedbackLayout { | ||
| 647 | u32 stream; | ||
| 648 | u32 varying_count; | ||
| 649 | u32 stride; | ||
| 650 | INSERT_UNION_PADDING_WORDS(1); | ||
| 651 | }; | ||
| 652 | static_assert(sizeof(TransformFeedbackLayout) == 16); | ||
| 653 | |||
| 624 | bool IsShaderConfigEnabled(std::size_t index) const { | 654 | bool IsShaderConfigEnabled(std::size_t index) const { |
| 625 | // The VertexB is always enabled. | 655 | // The VertexB is always enabled. |
| 626 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { | 656 | if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { |
| @@ -629,6 +659,10 @@ public: | |||
| 629 | return shader_config[index].enable != 0; | 659 | return shader_config[index].enable != 0; |
| 630 | } | 660 | } |
| 631 | 661 | ||
| 662 | bool IsShaderConfigEnabled(Regs::ShaderProgram type) const { | ||
| 663 | return IsShaderConfigEnabled(static_cast<std::size_t>(type)); | ||
| 664 | } | ||
| 665 | |||
| 632 | union { | 666 | union { |
| 633 | struct { | 667 | struct { |
| 634 | INSERT_UNION_PADDING_WORDS(0x45); | 668 | INSERT_UNION_PADDING_WORDS(0x45); |
| @@ -677,7 +711,13 @@ public: | |||
| 677 | 711 | ||
| 678 | u32 rasterize_enable; | 712 | u32 rasterize_enable; |
| 679 | 713 | ||
| 680 | INSERT_UNION_PADDING_WORDS(0xF1); | 714 | std::array<TransformFeedbackBinding, NumTransformFeedbackBuffers> tfb_bindings; |
| 715 | |||
| 716 | INSERT_UNION_PADDING_WORDS(0xC0); | ||
| 717 | |||
| 718 | std::array<TransformFeedbackLayout, NumTransformFeedbackBuffers> tfb_layouts; | ||
| 719 | |||
| 720 | INSERT_UNION_PADDING_WORDS(0x1); | ||
| 681 | 721 | ||
| 682 | u32 tfb_enabled; | 722 | u32 tfb_enabled; |
| 683 | 723 | ||
| @@ -705,7 +745,12 @@ public: | |||
| 705 | 745 | ||
| 706 | s32 clear_stencil; | 746 | s32 clear_stencil; |
| 707 | 747 | ||
| 708 | INSERT_UNION_PADDING_WORDS(0x7); | 748 | INSERT_UNION_PADDING_WORDS(0x2); |
| 749 | |||
| 750 | PolygonMode polygon_mode_front; | ||
| 751 | PolygonMode polygon_mode_back; | ||
| 752 | |||
| 753 | INSERT_UNION_PADDING_WORDS(0x3); | ||
| 709 | 754 | ||
| 710 | u32 polygon_offset_point_enable; | 755 | u32 polygon_offset_point_enable; |
| 711 | u32 polygon_offset_line_enable; | 756 | u32 polygon_offset_line_enable; |
| @@ -764,7 +809,11 @@ public: | |||
| 764 | BitField<12, 4, u32> viewport; | 809 | BitField<12, 4, u32> viewport; |
| 765 | } clear_flags; | 810 | } clear_flags; |
| 766 | 811 | ||
| 767 | INSERT_UNION_PADDING_WORDS(0x19); | 812 | INSERT_UNION_PADDING_WORDS(0x10); |
| 813 | |||
| 814 | u32 fill_rectangle; | ||
| 815 | |||
| 816 | INSERT_UNION_PADDING_WORDS(0x8); | ||
| 768 | 817 | ||
| 769 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; | 818 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; |
| 770 | 819 | ||
| @@ -1187,7 +1236,11 @@ public: | |||
| 1187 | 1236 | ||
| 1188 | u32 tex_cb_index; | 1237 | u32 tex_cb_index; |
| 1189 | 1238 | ||
| 1190 | INSERT_UNION_PADDING_WORDS(0x395); | 1239 | INSERT_UNION_PADDING_WORDS(0x7D); |
| 1240 | |||
| 1241 | std::array<std::array<u8, 128>, NumTransformFeedbackBuffers> tfb_varying_locs; | ||
| 1242 | |||
| 1243 | INSERT_UNION_PADDING_WORDS(0x298); | ||
| 1191 | 1244 | ||
| 1192 | struct { | 1245 | struct { |
| 1193 | /// Compressed address of a buffer that holds information about bound SSBOs. | 1246 | /// Compressed address of a buffer that holds information about bound SSBOs. |
| @@ -1413,6 +1466,8 @@ ASSERT_REG_POSITION(tess_mode, 0xC8); | |||
| 1413 | ASSERT_REG_POSITION(tess_level_outer, 0xC9); | 1466 | ASSERT_REG_POSITION(tess_level_outer, 0xC9); |
| 1414 | ASSERT_REG_POSITION(tess_level_inner, 0xCD); | 1467 | ASSERT_REG_POSITION(tess_level_inner, 0xCD); |
| 1415 | ASSERT_REG_POSITION(rasterize_enable, 0xDF); | 1468 | ASSERT_REG_POSITION(rasterize_enable, 0xDF); |
| 1469 | ASSERT_REG_POSITION(tfb_bindings, 0xE0); | ||
| 1470 | ASSERT_REG_POSITION(tfb_layouts, 0x1C0); | ||
| 1416 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); | 1471 | ASSERT_REG_POSITION(tfb_enabled, 0x1D1); |
| 1417 | ASSERT_REG_POSITION(rt, 0x200); | 1472 | ASSERT_REG_POSITION(rt, 0x200); |
| 1418 | ASSERT_REG_POSITION(viewport_transform, 0x280); | 1473 | ASSERT_REG_POSITION(viewport_transform, 0x280); |
| @@ -1422,6 +1477,8 @@ ASSERT_REG_POSITION(depth_mode, 0x35F); | |||
| 1422 | ASSERT_REG_POSITION(clear_color[0], 0x360); | 1477 | ASSERT_REG_POSITION(clear_color[0], 0x360); |
| 1423 | ASSERT_REG_POSITION(clear_depth, 0x364); | 1478 | ASSERT_REG_POSITION(clear_depth, 0x364); |
| 1424 | ASSERT_REG_POSITION(clear_stencil, 0x368); | 1479 | ASSERT_REG_POSITION(clear_stencil, 0x368); |
| 1480 | ASSERT_REG_POSITION(polygon_mode_front, 0x36B); | ||
| 1481 | ASSERT_REG_POSITION(polygon_mode_back, 0x36C); | ||
| 1425 | ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); | 1482 | ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370); |
| 1426 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); | 1483 | ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371); |
| 1427 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); | 1484 | ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372); |
| @@ -1435,6 +1492,7 @@ ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); | |||
| 1435 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); | 1492 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); |
| 1436 | ASSERT_REG_POSITION(zeta, 0x3F8); | 1493 | ASSERT_REG_POSITION(zeta, 0x3F8); |
| 1437 | ASSERT_REG_POSITION(clear_flags, 0x43E); | 1494 | ASSERT_REG_POSITION(clear_flags, 0x43E); |
| 1495 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); | ||
| 1438 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); | 1496 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); |
| 1439 | ASSERT_REG_POSITION(rt_control, 0x487); | 1497 | ASSERT_REG_POSITION(rt_control, 0x487); |
| 1440 | ASSERT_REG_POSITION(zeta_width, 0x48a); | 1498 | ASSERT_REG_POSITION(zeta_width, 0x48a); |
| @@ -1508,6 +1566,7 @@ ASSERT_REG_POSITION(firmware, 0x8C0); | |||
| 1508 | ASSERT_REG_POSITION(const_buffer, 0x8E0); | 1566 | ASSERT_REG_POSITION(const_buffer, 0x8E0); |
| 1509 | ASSERT_REG_POSITION(cb_bind[0], 0x904); | 1567 | ASSERT_REG_POSITION(cb_bind[0], 0x904); |
| 1510 | ASSERT_REG_POSITION(tex_cb_index, 0x982); | 1568 | ASSERT_REG_POSITION(tex_cb_index, 0x982); |
| 1569 | ASSERT_REG_POSITION(tfb_varying_locs, 0xA00); | ||
| 1511 | ASSERT_REG_POSITION(ssbo_info, 0xD18); | 1570 | ASSERT_REG_POSITION(ssbo_info, 0xD18); |
| 1512 | ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); | 1571 | ASSERT_REG_POSITION(tex_info_buffers.address[0], 0xD2A); |
| 1513 | ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); | 1572 | ASSERT_REG_POSITION(tex_info_buffers.size[0], 0xD2F); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c9bc83cd7..eba42deb4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -911,14 +911,9 @@ union Instruction { | |||
| 911 | } fadd32i; | 911 | } fadd32i; |
| 912 | 912 | ||
| 913 | union { | 913 | union { |
| 914 | BitField<20, 8, u64> shift_position; | 914 | BitField<40, 1, u64> brev; |
| 915 | BitField<28, 8, u64> shift_length; | 915 | BitField<47, 1, u64> rd_cc; |
| 916 | BitField<48, 1, u64> negate_b; | 916 | BitField<48, 1, u64> is_signed; |
| 917 | BitField<49, 1, u64> negate_a; | ||
| 918 | |||
| 919 | u64 GetLeftShiftValue() const { | ||
| 920 | return 32 - (shift_position + shift_length); | ||
| 921 | } | ||
| 922 | } bfe; | 917 | } bfe; |
| 923 | 918 | ||
| 924 | union { | 919 | union { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ba8c9d665..64acb17df 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 { | |||
| 39 | RGBA32_FLOAT = 0xC0, | 39 | RGBA32_FLOAT = 0xC0, |
| 40 | RGBA32_UINT = 0xC2, | 40 | RGBA32_UINT = 0xC2, |
| 41 | RGBA16_UNORM = 0xC6, | 41 | RGBA16_UNORM = 0xC6, |
| 42 | RGBA16_SNORM = 0xC7, | ||
| 42 | RGBA16_UINT = 0xC9, | 43 | RGBA16_UINT = 0xC9, |
| 43 | RGBA16_FLOAT = 0xCA, | 44 | RGBA16_FLOAT = 0xCA, |
| 44 | RG32_FLOAT = 0xCB, | 45 | RG32_FLOAT = 0xCB, |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index 6adef459e..f058f2744 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp | |||
| @@ -4,13 +4,15 @@ | |||
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <vector> | ||
| 7 | 8 | ||
| 9 | #include "common/common_types.h" | ||
| 8 | #include "video_core/guest_driver.h" | 10 | #include "video_core/guest_driver.h" |
| 9 | 11 | ||
| 10 | namespace VideoCore { | 12 | namespace VideoCore { |
| 11 | 13 | ||
| 12 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | 14 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) { |
| 13 | if (texture_handler_size_deduced) { | 15 | if (texture_handler_size) { |
| 14 | return; | 16 | return; |
| 15 | } | 17 | } |
| 16 | const std::size_t size = bound_offsets.size(); | 18 | const std::size_t size = bound_offsets.size(); |
| @@ -29,7 +31,6 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse | |||
| 29 | if (min_val > 2) { | 31 | if (min_val > 2) { |
| 30 | return; | 32 | return; |
| 31 | } | 33 | } |
| 32 | texture_handler_size_deduced = true; | ||
| 33 | texture_handler_size = min_texture_handler_size * min_val; | 34 | texture_handler_size = min_texture_handler_size * min_val; |
| 34 | } | 35 | } |
| 35 | 36 | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index fc1917347..99450777e 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <optional> | ||
| 7 | #include <vector> | 8 | #include <vector> |
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -17,25 +18,29 @@ namespace VideoCore { | |||
| 17 | */ | 18 | */ |
| 18 | class GuestDriverProfile { | 19 | class GuestDriverProfile { |
| 19 | public: | 20 | public: |
| 20 | void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | 21 | explicit GuestDriverProfile() = default; |
| 22 | explicit GuestDriverProfile(std::optional<u32> texture_handler_size) | ||
| 23 | : texture_handler_size{texture_handler_size} {} | ||
| 24 | |||
| 25 | void DeduceTextureHandlerSize(std::vector<u32> bound_offsets); | ||
| 21 | 26 | ||
| 22 | u32 GetTextureHandlerSize() const { | 27 | u32 GetTextureHandlerSize() const { |
| 23 | return texture_handler_size; | 28 | return texture_handler_size.value_or(default_texture_handler_size); |
| 24 | } | 29 | } |
| 25 | 30 | ||
| 26 | bool TextureHandlerSizeKnown() const { | 31 | bool IsTextureHandlerSizeKnown() const { |
| 27 | return texture_handler_size_deduced; | 32 | return texture_handler_size.has_value(); |
| 28 | } | 33 | } |
| 29 | 34 | ||
| 30 | private: | 35 | private: |
| 31 | // Minimum size of texture handler any driver can use. | 36 | // Minimum size of texture handler any driver can use. |
| 32 | static constexpr u32 min_texture_handler_size = 4; | 37 | static constexpr u32 min_texture_handler_size = 4; |
| 33 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily | 38 | |
| 34 | // use 4 bytes instead. Thus, certain drivers may squish the size. | 39 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead. |
| 40 | // Thus, certain drivers may squish the size. | ||
| 35 | static constexpr u32 default_texture_handler_size = 8; | 41 | static constexpr u32 default_texture_handler_size = 8; |
| 36 | 42 | ||
| 37 | u32 texture_handler_size = default_texture_handler_size; | 43 | std::optional<u32> texture_handler_size = default_texture_handler_size; |
| 38 | bool texture_handler_size_deduced = false; | ||
| 39 | }; | 44 | }; |
| 40 | 45 | ||
| 41 | } // namespace VideoCore | 46 | } // namespace VideoCore |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index aea010087..073bdb491 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -174,7 +174,7 @@ private: | |||
| 174 | /// End of address space, based on address space in bits. | 174 | /// End of address space, based on address space in bits. |
| 175 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; | 175 | static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; |
| 176 | 176 | ||
| 177 | Common::PageTable page_table{page_bits}; | 177 | Common::BackingPageTable page_table{page_bits}; |
| 178 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 179 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | 180 | ||
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index f2c83266e..6d522c318 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = { | |||
| 51 | MortonCopy<true, PixelFormat::R8UI>, | 51 | MortonCopy<true, PixelFormat::R8UI>, |
| 52 | MortonCopy<true, PixelFormat::RGBA16F>, | 52 | MortonCopy<true, PixelFormat::RGBA16F>, |
| 53 | MortonCopy<true, PixelFormat::RGBA16U>, | 53 | MortonCopy<true, PixelFormat::RGBA16U>, |
| 54 | MortonCopy<true, PixelFormat::RGBA16S>, | ||
| 54 | MortonCopy<true, PixelFormat::RGBA16UI>, | 55 | MortonCopy<true, PixelFormat::RGBA16UI>, |
| 55 | MortonCopy<true, PixelFormat::R11FG11FB10F>, | 56 | MortonCopy<true, PixelFormat::R11FG11FB10F>, |
| 56 | MortonCopy<true, PixelFormat::RGBA32UI>, | 57 | MortonCopy<true, PixelFormat::RGBA32UI>, |
| @@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = { | |||
| 131 | MortonCopy<false, PixelFormat::R8U>, | 132 | MortonCopy<false, PixelFormat::R8U>, |
| 132 | MortonCopy<false, PixelFormat::R8UI>, | 133 | MortonCopy<false, PixelFormat::R8UI>, |
| 133 | MortonCopy<false, PixelFormat::RGBA16F>, | 134 | MortonCopy<false, PixelFormat::RGBA16F>, |
| 135 | MortonCopy<false, PixelFormat::RGBA16S>, | ||
| 134 | MortonCopy<false, PixelFormat::RGBA16U>, | 136 | MortonCopy<false, PixelFormat::RGBA16U>, |
| 135 | MortonCopy<false, PixelFormat::RGBA16UI>, | 137 | MortonCopy<false, PixelFormat::RGBA16UI>, |
| 136 | MortonCopy<false, PixelFormat::R11FG11FB10F>, | 138 | MortonCopy<false, PixelFormat::R11FG11FB10F>, |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3e4514b94..1a68e3caa 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -25,7 +25,6 @@ constexpr std::size_t NumQueryTypes = 1; | |||
| 25 | 25 | ||
| 26 | enum class LoadCallbackStage { | 26 | enum class LoadCallbackStage { |
| 27 | Prepare, | 27 | Prepare, |
| 28 | Decompile, | ||
| 29 | Build, | 28 | Build, |
| 30 | Complete, | 29 | Complete, |
| 31 | }; | 30 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4e4138573..063f41327 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -28,7 +28,6 @@ | |||
| 28 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 29 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 32 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 33 | #include "video_core/renderer_opengl/renderer_opengl.h" | 32 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 34 | 33 | ||
| @@ -76,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 76 | } | 75 | } |
| 77 | 76 | ||
| 78 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | 77 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, |
| 79 | const GLShader::ConstBufferEntry& entry) { | 78 | const ConstBufferEntry& entry) { |
| 80 | if (!entry.IsIndirect()) { | 79 | if (!entry.IsIndirect()) { |
| 81 | return entry.GetSize(); | 80 | return entry.GetSize(); |
| 82 | } | 81 | } |
| @@ -94,10 +93,6 @@ void oglEnable(GLenum cap, bool state) { | |||
| 94 | (state ? glEnable : glDisable)(cap); | 93 | (state ? glEnable : glDisable)(cap); |
| 95 | } | 94 | } |
| 96 | 95 | ||
| 97 | void oglEnablei(GLenum cap, bool state, GLuint index) { | ||
| 98 | (state ? glEnablei : glDisablei)(cap, index); | ||
| 99 | } | ||
| 100 | |||
| 101 | } // Anonymous namespace | 96 | } // Anonymous namespace |
| 102 | 97 | ||
| 103 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 98 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| @@ -272,9 +267,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 272 | SetupDrawTextures(stage, shader); | 267 | SetupDrawTextures(stage, shader); |
| 273 | SetupDrawImages(stage, shader); | 268 | SetupDrawImages(stage, shader); |
| 274 | 269 | ||
| 275 | const ProgramVariant variant(primitive_mode); | 270 | const GLuint program_handle = shader->GetHandle(); |
| 276 | const auto program_handle = shader->GetHandle(variant); | ||
| 277 | |||
| 278 | switch (program) { | 271 | switch (program) { |
| 279 | case Maxwell::ShaderProgram::VertexA: | 272 | case Maxwell::ShaderProgram::VertexA: |
| 280 | case Maxwell::ShaderProgram::VertexB: | 273 | case Maxwell::ShaderProgram::VertexB: |
| @@ -295,7 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 295 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 288 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 296 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 289 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| 297 | // clip distances only when it's written by a shader stage. | 290 | // clip distances only when it's written by a shader stage. |
| 298 | clip_distances |= shader->GetShaderEntries().clip_distances; | 291 | clip_distances |= shader->GetEntries().clip_distances; |
| 299 | 292 | ||
| 300 | // When VertexA is enabled, we have dual vertex shaders | 293 | // When VertexA is enabled, we have dual vertex shaders |
| 301 | if (program == Maxwell::ShaderProgram::VertexA) { | 294 | if (program == Maxwell::ShaderProgram::VertexA) { |
| @@ -481,12 +474,12 @@ void RasterizerOpenGL::Clear() { | |||
| 481 | void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | 474 | void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { |
| 482 | MICROPROFILE_SCOPE(OpenGL_Drawing); | 475 | MICROPROFILE_SCOPE(OpenGL_Drawing); |
| 483 | auto& gpu = system.GPU().Maxwell3D(); | 476 | auto& gpu = system.GPU().Maxwell3D(); |
| 484 | const auto& regs = gpu.regs; | ||
| 485 | 477 | ||
| 486 | query_cache.UpdateCounters(); | 478 | query_cache.UpdateCounters(); |
| 487 | 479 | ||
| 488 | SyncViewport(); | 480 | SyncViewport(); |
| 489 | SyncRasterizeEnable(); | 481 | SyncRasterizeEnable(); |
| 482 | SyncPolygonModes(); | ||
| 490 | SyncColorMask(); | 483 | SyncColorMask(); |
| 491 | SyncFragmentColorClampState(); | 484 | SyncFragmentColorClampState(); |
| 492 | SyncMultiSampleState(); | 485 | SyncMultiSampleState(); |
| @@ -498,7 +491,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 498 | SyncCullMode(); | 491 | SyncCullMode(); |
| 499 | SyncPrimitiveRestart(); | 492 | SyncPrimitiveRestart(); |
| 500 | SyncScissorTest(); | 493 | SyncScissorTest(); |
| 501 | SyncTransformFeedback(); | ||
| 502 | SyncPointState(); | 494 | SyncPointState(); |
| 503 | SyncPolygonOffset(); | 495 | SyncPolygonOffset(); |
| 504 | SyncAlphaTest(); | 496 | SyncAlphaTest(); |
| @@ -532,7 +524,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 532 | // Upload vertex and index data. | 524 | // Upload vertex and index data. |
| 533 | SetupVertexBuffer(); | 525 | SetupVertexBuffer(); |
| 534 | SetupVertexInstances(); | 526 | SetupVertexInstances(); |
| 535 | GLintptr index_buffer_offset; | 527 | GLintptr index_buffer_offset = 0; |
| 536 | if (is_indexed) { | 528 | if (is_indexed) { |
| 537 | index_buffer_offset = SetupIndexBuffer(); | 529 | index_buffer_offset = SetupIndexBuffer(); |
| 538 | } | 530 | } |
| @@ -558,7 +550,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 558 | ConfigureFramebuffers(); | 550 | ConfigureFramebuffers(); |
| 559 | 551 | ||
| 560 | // Signal the buffer cache that we are not going to upload more things. | 552 | // Signal the buffer cache that we are not going to upload more things. |
| 561 | const bool invalidate = buffer_cache.Unmap(); | 553 | buffer_cache.Unmap(); |
| 562 | 554 | ||
| 563 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | 555 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. |
| 564 | vertex_array_pushbuffer.Bind(); | 556 | vertex_array_pushbuffer.Bind(); |
| @@ -571,7 +563,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 571 | glTextureBarrier(); | 563 | glTextureBarrier(); |
| 572 | } | 564 | } |
| 573 | 565 | ||
| 574 | ++num_queued_commands; | 566 | BeginTransformFeedback(primitive_mode); |
| 575 | 567 | ||
| 576 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); | 568 | const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); |
| 577 | const GLsizei num_instances = | 569 | const GLsizei num_instances = |
| @@ -610,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 610 | num_instances, base_instance); | 602 | num_instances, base_instance); |
| 611 | } | 603 | } |
| 612 | } | 604 | } |
| 605 | |||
| 606 | EndTransformFeedback(); | ||
| 607 | |||
| 608 | ++num_queued_commands; | ||
| 613 | } | 609 | } |
| 614 | 610 | ||
| 615 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 611 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -622,12 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 622 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 618 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 623 | SetupComputeTextures(kernel); | 619 | SetupComputeTextures(kernel); |
| 624 | SetupComputeImages(kernel); | 620 | SetupComputeImages(kernel); |
| 625 | 621 | program_manager.BindComputeShader(kernel->GetHandle()); | |
| 626 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 627 | const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, | ||
| 628 | launch_desc.block_dim_z, launch_desc.shared_alloc, | ||
| 629 | launch_desc.local_pos_alloc); | ||
| 630 | program_manager.BindComputeShader(kernel->GetHandle(variant)); | ||
| 631 | 622 | ||
| 632 | const std::size_t buffer_size = | 623 | const std::size_t buffer_size = |
| 633 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 624 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -645,6 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 645 | bind_ubo_pushbuffer.Bind(); | 636 | bind_ubo_pushbuffer.Bind(); |
| 646 | bind_ssbo_pushbuffer.Bind(); | 637 | bind_ssbo_pushbuffer.Bind(); |
| 647 | 638 | ||
| 639 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||
| 648 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 640 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 649 | ++num_queued_commands; | 641 | ++num_queued_commands; |
| 650 | } | 642 | } |
| @@ -749,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad | |||
| 749 | const auto& shader_stage = stages[stage_index]; | 741 | const auto& shader_stage = stages[stage_index]; |
| 750 | 742 | ||
| 751 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 743 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; |
| 752 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { | 744 | for (const auto& entry : shader->GetEntries().const_buffers) { |
| 753 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 745 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; |
| 754 | SetupConstBuffer(binding++, buffer, entry); | 746 | SetupConstBuffer(binding++, buffer, entry); |
| 755 | } | 747 | } |
| @@ -760,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 760 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 752 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 761 | 753 | ||
| 762 | u32 binding = 0; | 754 | u32 binding = 0; |
| 763 | for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | 755 | for (const auto& entry : kernel->GetEntries().const_buffers) { |
| 764 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 756 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 765 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 757 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 766 | Tegra::Engines::ConstBufferInfo buffer; | 758 | Tegra::Engines::ConstBufferInfo buffer; |
| @@ -772,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | |||
| 772 | } | 764 | } |
| 773 | 765 | ||
| 774 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 766 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 775 | const GLShader::ConstBufferEntry& entry) { | 767 | const ConstBufferEntry& entry) { |
| 776 | if (!buffer.enabled) { | 768 | if (!buffer.enabled) { |
| 777 | // Set values to zero to unbind buffers | 769 | // Set values to zero to unbind buffers |
| 778 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 770 | bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, |
| @@ -796,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 796 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 788 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 797 | 789 | ||
| 798 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 790 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; |
| 799 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | 791 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 800 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | 792 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 801 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 793 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 802 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 794 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -810,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 810 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 802 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| 811 | 803 | ||
| 812 | u32 binding = 0; | 804 | u32 binding = 0; |
| 813 | for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | 805 | for (const auto& entry : kernel->GetEntries().global_memory_entries) { |
| 814 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | 806 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; |
| 815 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; | 807 | const auto gpu_addr{memory_manager.Read<u64>(addr)}; |
| 816 | const auto size{memory_manager.Read<u32>(addr + 8)}; | 808 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| @@ -818,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | |||
| 818 | } | 810 | } |
| 819 | } | 811 | } |
| 820 | 812 | ||
| 821 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, | 813 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, |
| 822 | GPUVAddr gpu_addr, std::size_t size) { | 814 | GPUVAddr gpu_addr, std::size_t size) { |
| 823 | const auto alignment{device.GetShaderStorageBufferAlignment()}; | 815 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 824 | const auto [ssbo, buffer_offset] = | 816 | const auto [ssbo, buffer_offset] = |
| @@ -830,7 +822,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 830 | MICROPROFILE_SCOPE(OpenGL_Texture); | 822 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 831 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 823 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 832 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 824 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 833 | for (const auto& entry : shader->GetShaderEntries().samplers) { | 825 | for (const auto& entry : shader->GetEntries().samplers) { |
| 834 | const auto shader_type = static_cast<ShaderType>(stage_index); | 826 | const auto shader_type = static_cast<ShaderType>(stage_index); |
| 835 | for (std::size_t i = 0; i < entry.Size(); ++i) { | 827 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 836 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | 828 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); |
| @@ -843,7 +835,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 843 | MICROPROFILE_SCOPE(OpenGL_Texture); | 835 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 844 | const auto& compute = system.GPU().KeplerCompute(); | 836 | const auto& compute = system.GPU().KeplerCompute(); |
| 845 | u32 binding = 0; | 837 | u32 binding = 0; |
| 846 | for (const auto& entry : kernel->GetShaderEntries().samplers) { | 838 | for (const auto& entry : kernel->GetEntries().samplers) { |
| 847 | for (std::size_t i = 0; i < entry.Size(); ++i) { | 839 | for (std::size_t i = 0; i < entry.Size(); ++i) { |
| 848 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); | 840 | const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); |
| 849 | SetupTexture(binding++, texture, entry); | 841 | SetupTexture(binding++, texture, entry); |
| @@ -852,7 +844,7 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 852 | } | 844 | } |
| 853 | 845 | ||
| 854 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 846 | void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 855 | const GLShader::SamplerEntry& entry) { | 847 | const SamplerEntry& entry) { |
| 856 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 848 | const auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 857 | if (!view) { | 849 | if (!view) { |
| 858 | // Can occur when texture addr is null or its memory is unmapped/invalid | 850 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| @@ -875,7 +867,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 875 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 867 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { |
| 876 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 868 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 877 | u32 binding = device.GetBaseBindings(stage_index).image; | 869 | u32 binding = device.GetBaseBindings(stage_index).image; |
| 878 | for (const auto& entry : shader->GetShaderEntries().images) { | 870 | for (const auto& entry : shader->GetEntries().images) { |
| 879 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 871 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 880 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; | 872 | const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; |
| 881 | SetupImage(binding++, tic, entry); | 873 | SetupImage(binding++, tic, entry); |
| @@ -885,14 +877,14 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 885 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 877 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { |
| 886 | const auto& compute = system.GPU().KeplerCompute(); | 878 | const auto& compute = system.GPU().KeplerCompute(); |
| 887 | u32 binding = 0; | 879 | u32 binding = 0; |
| 888 | for (const auto& entry : shader->GetShaderEntries().images) { | 880 | for (const auto& entry : shader->GetEntries().images) { |
| 889 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; | 881 | const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; |
| 890 | SetupImage(binding++, tic, entry); | 882 | SetupImage(binding++, tic, entry); |
| 891 | } | 883 | } |
| 892 | } | 884 | } |
| 893 | 885 | ||
| 894 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 886 | void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, |
| 895 | const GLShader::ImageEntry& entry) { | 887 | const ImageEntry& entry) { |
| 896 | const auto view = texture_cache.GetImageSurface(tic, entry); | 888 | const auto view = texture_cache.GetImageSurface(tic, entry); |
| 897 | if (!view) { | 889 | if (!view) { |
| 898 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | 890 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); |
| @@ -1096,6 +1088,45 @@ void RasterizerOpenGL::SyncRasterizeEnable() { | |||
| 1096 | oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); | 1088 | oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); |
| 1097 | } | 1089 | } |
| 1098 | 1090 | ||
| 1091 | void RasterizerOpenGL::SyncPolygonModes() { | ||
| 1092 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1093 | auto& flags = gpu.dirty.flags; | ||
| 1094 | if (!flags[Dirty::PolygonModes]) { | ||
| 1095 | return; | ||
| 1096 | } | ||
| 1097 | flags[Dirty::PolygonModes] = false; | ||
| 1098 | |||
| 1099 | if (gpu.regs.fill_rectangle) { | ||
| 1100 | if (!GLAD_GL_NV_fill_rectangle) { | ||
| 1101 | LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported"); | ||
| 1102 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); | ||
| 1103 | return; | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | flags[Dirty::PolygonModeFront] = true; | ||
| 1107 | flags[Dirty::PolygonModeBack] = true; | ||
| 1108 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV); | ||
| 1109 | return; | ||
| 1110 | } | ||
| 1111 | |||
| 1112 | if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) { | ||
| 1113 | flags[Dirty::PolygonModeFront] = false; | ||
| 1114 | flags[Dirty::PolygonModeBack] = false; | ||
| 1115 | glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); | ||
| 1116 | return; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | if (flags[Dirty::PolygonModeFront]) { | ||
| 1120 | flags[Dirty::PolygonModeFront] = false; | ||
| 1121 | glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | if (flags[Dirty::PolygonModeBack]) { | ||
| 1125 | flags[Dirty::PolygonModeBack] = false; | ||
| 1126 | glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back)); | ||
| 1127 | } | ||
| 1128 | } | ||
| 1129 | |||
| 1099 | void RasterizerOpenGL::SyncColorMask() { | 1130 | void RasterizerOpenGL::SyncColorMask() { |
| 1100 | auto& gpu = system.GPU().Maxwell3D(); | 1131 | auto& gpu = system.GPU().Maxwell3D(); |
| 1101 | auto& flags = gpu.dirty.flags; | 1132 | auto& flags = gpu.dirty.flags; |
| @@ -1257,11 +1288,6 @@ void RasterizerOpenGL::SyncScissorTest() { | |||
| 1257 | } | 1288 | } |
| 1258 | } | 1289 | } |
| 1259 | 1290 | ||
| 1260 | void RasterizerOpenGL::SyncTransformFeedback() { | ||
| 1261 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1262 | UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | void RasterizerOpenGL::SyncPointState() { | 1291 | void RasterizerOpenGL::SyncPointState() { |
| 1266 | auto& gpu = system.GPU().Maxwell3D(); | 1292 | auto& gpu = system.GPU().Maxwell3D(); |
| 1267 | auto& flags = gpu.dirty.flags; | 1293 | auto& flags = gpu.dirty.flags; |
| @@ -1337,4 +1363,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { | |||
| 1337 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); | 1363 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); |
| 1338 | } | 1364 | } |
| 1339 | 1365 | ||
| 1366 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | ||
| 1367 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1368 | if (regs.tfb_enabled == 0) { | ||
| 1369 | return; | ||
| 1370 | } | ||
| 1371 | |||
| 1372 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 1373 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 1374 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 1375 | |||
| 1376 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1377 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1378 | if (!binding.buffer_enable) { | ||
| 1379 | if (enabled_transform_feedback_buffers[index]) { | ||
| 1380 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||
| 1381 | 0); | ||
| 1382 | } | ||
| 1383 | enabled_transform_feedback_buffers[index] = false; | ||
| 1384 | continue; | ||
| 1385 | } | ||
| 1386 | enabled_transform_feedback_buffers[index] = true; | ||
| 1387 | |||
| 1388 | auto& tfb_buffer = transform_feedback_buffers[index]; | ||
| 1389 | tfb_buffer.Create(); | ||
| 1390 | |||
| 1391 | const GLuint handle = tfb_buffer.handle; | ||
| 1392 | const std::size_t size = binding.buffer_size; | ||
| 1393 | glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||
| 1394 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||
| 1395 | static_cast<GLsizeiptr>(size)); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | glBeginTransformFeedback(GL_POINTS); | ||
| 1399 | } | ||
| 1400 | |||
| 1401 | void RasterizerOpenGL::EndTransformFeedback() { | ||
| 1402 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1403 | if (regs.tfb_enabled == 0) { | ||
| 1404 | return; | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | glEndTransformFeedback(); | ||
| 1408 | |||
| 1409 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1410 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1411 | if (!binding.buffer_enable) { | ||
| 1412 | continue; | ||
| 1413 | } | ||
| 1414 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 1415 | |||
| 1416 | const GLuint handle = transform_feedback_buffers[index].handle; | ||
| 1417 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 1418 | const std::size_t size = binding.buffer_size; | ||
| 1419 | const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 1420 | glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); | ||
| 1421 | } | ||
| 1422 | } | ||
| 1423 | |||
| 1340 | } // namespace OpenGL | 1424 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b24c6661b..2d3be2437 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -98,7 +98,7 @@ private: | |||
| 98 | 98 | ||
| 99 | /// Configures a constant buffer. | 99 | /// Configures a constant buffer. |
| 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 100 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 101 | const GLShader::ConstBufferEntry& entry); | 101 | const ConstBufferEntry& entry); |
| 102 | 102 | ||
| 103 | /// Configures the current global memory entries to use for the draw command. | 103 | /// Configures the current global memory entries to use for the draw command. |
| 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 104 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); |
| @@ -107,7 +107,7 @@ private: | |||
| 107 | void SetupComputeGlobalMemory(const Shader& kernel); | 107 | void SetupComputeGlobalMemory(const Shader& kernel); |
| 108 | 108 | ||
| 109 | /// Configures a constant buffer. | 109 | /// Configures a constant buffer. |
| 110 | void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 110 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 111 | std::size_t size); | 111 | std::size_t size); |
| 112 | 112 | ||
| 113 | /// Configures the current textures to use for the draw command. | 113 | /// Configures the current textures to use for the draw command. |
| @@ -118,7 +118,7 @@ private: | |||
| 118 | 118 | ||
| 119 | /// Configures a texture. | 119 | /// Configures a texture. |
| 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 120 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 121 | const GLShader::SamplerEntry& entry); | 121 | const SamplerEntry& entry); |
| 122 | 122 | ||
| 123 | /// Configures images in a graphics shader. | 123 | /// Configures images in a graphics shader. |
| 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | 124 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); |
| @@ -127,8 +127,7 @@ private: | |||
| 127 | void SetupComputeImages(const Shader& shader); | 127 | void SetupComputeImages(const Shader& shader); |
| 128 | 128 | ||
| 129 | /// Configures an image. | 129 | /// Configures an image. |
| 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, | 130 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 131 | const GLShader::ImageEntry& entry); | ||
| 132 | 131 | ||
| 133 | /// Syncs the viewport and depth range to match the guest state | 132 | /// Syncs the viewport and depth range to match the guest state |
| 134 | void SyncViewport(); | 133 | void SyncViewport(); |
| @@ -169,15 +168,15 @@ private: | |||
| 169 | /// Syncs the scissor test state to match the guest state | 168 | /// Syncs the scissor test state to match the guest state |
| 170 | void SyncScissorTest(); | 169 | void SyncScissorTest(); |
| 171 | 170 | ||
| 172 | /// Syncs the transform feedback state to match the guest state | ||
| 173 | void SyncTransformFeedback(); | ||
| 174 | |||
| 175 | /// Syncs the point state to match the guest state | 171 | /// Syncs the point state to match the guest state |
| 176 | void SyncPointState(); | 172 | void SyncPointState(); |
| 177 | 173 | ||
| 178 | /// Syncs the rasterizer enable state to match the guest state | 174 | /// Syncs the rasterizer enable state to match the guest state |
| 179 | void SyncRasterizeEnable(); | 175 | void SyncRasterizeEnable(); |
| 180 | 176 | ||
| 177 | /// Syncs polygon modes to match the guest state | ||
| 178 | void SyncPolygonModes(); | ||
| 179 | |||
| 181 | /// Syncs Color Mask | 180 | /// Syncs Color Mask |
| 182 | void SyncColorMask(); | 181 | void SyncColorMask(); |
| 183 | 182 | ||
| @@ -190,6 +189,12 @@ private: | |||
| 190 | /// Syncs the framebuffer sRGB state to match the guest state | 189 | /// Syncs the framebuffer sRGB state to match the guest state |
| 191 | void SyncFramebufferSRGB(); | 190 | void SyncFramebufferSRGB(); |
| 192 | 191 | ||
| 192 | /// Begin a transform feedback | ||
| 193 | void BeginTransformFeedback(GLenum primitive_mode); | ||
| 194 | |||
| 195 | /// End a transform feedback | ||
| 196 | void EndTransformFeedback(); | ||
| 197 | |||
| 193 | /// Check for extension that are not strictly required but are needed for correct emulation | 198 | /// Check for extension that are not strictly required but are needed for correct emulation |
| 194 | void CheckExtensions(); | 199 | void CheckExtensions(); |
| 195 | 200 | ||
| @@ -227,6 +232,11 @@ private: | |||
| 227 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 232 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 228 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 233 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 229 | 234 | ||
| 235 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 236 | transform_feedback_buffers; | ||
| 237 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 238 | enabled_transform_feedback_buffers; | ||
| 239 | |||
| 230 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 240 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 231 | std::size_t num_queued_commands = 0; | 241 | std::size_t num_queued_commands = 0; |
| 232 | 242 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cb89db8c..e3d31c3eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -2,12 +2,16 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <atomic> | ||
| 6 | #include <functional> | ||
| 5 | #include <mutex> | 7 | #include <mutex> |
| 6 | #include <optional> | 8 | #include <optional> |
| 7 | #include <string> | 9 | #include <string> |
| 8 | #include <thread> | 10 | #include <thread> |
| 9 | #include <unordered_set> | 11 | #include <unordered_set> |
| 12 | |||
| 10 | #include <boost/functional/hash.hpp> | 13 | #include <boost/functional/hash.hpp> |
| 14 | |||
| 11 | #include "common/alignment.h" | 15 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | 16 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| @@ -24,13 +28,14 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 28 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 29 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 26 | #include "video_core/renderer_opengl/utils.h" | 30 | #include "video_core/renderer_opengl/utils.h" |
| 31 | #include "video_core/shader/registry.h" | ||
| 27 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
| 28 | 33 | ||
| 29 | namespace OpenGL { | 34 | namespace OpenGL { |
| 30 | 35 | ||
| 31 | using Tegra::Engines::ShaderType; | 36 | using Tegra::Engines::ShaderType; |
| 32 | using VideoCommon::Shader::ConstBufferLocker; | ||
| 33 | using VideoCommon::Shader::ProgramCode; | 37 | using VideoCommon::Shader::ProgramCode; |
| 38 | using VideoCommon::Shader::Registry; | ||
| 34 | using VideoCommon::Shader::ShaderIR; | 39 | using VideoCommon::Shader::ShaderIR; |
| 35 | 40 | ||
| 36 | namespace { | 41 | namespace { |
| @@ -56,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | |||
| 56 | } | 61 | } |
| 57 | 62 | ||
| 58 | /// Calculates the size of a program stream | 63 | /// Calculates the size of a program stream |
| 59 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 64 | std::size_t CalculateProgramSize(const ProgramCode& program) { |
| 60 | constexpr std::size_t start_offset = 10; | 65 | constexpr std::size_t start_offset = 10; |
| 61 | // This is the encoded version of BRA that jumps to itself. All Nvidia | 66 | // This is the encoded version of BRA that jumps to itself. All Nvidia |
| 62 | // shaders end with one. | 67 | // shaders end with one. |
| @@ -109,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { | |||
| 109 | } | 114 | } |
| 110 | } | 115 | } |
| 111 | 116 | ||
| 112 | /// Describes primitive behavior on geometry shaders | ||
| 113 | constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | ||
| 114 | switch (primitive_mode) { | ||
| 115 | case GL_POINTS: | ||
| 116 | return {"points", 1}; | ||
| 117 | case GL_LINES: | ||
| 118 | case GL_LINE_STRIP: | ||
| 119 | return {"lines", 2}; | ||
| 120 | case GL_LINES_ADJACENCY: | ||
| 121 | case GL_LINE_STRIP_ADJACENCY: | ||
| 122 | return {"lines_adjacency", 4}; | ||
| 123 | case GL_TRIANGLES: | ||
| 124 | case GL_TRIANGLE_STRIP: | ||
| 125 | case GL_TRIANGLE_FAN: | ||
| 126 | return {"triangles", 3}; | ||
| 127 | case GL_TRIANGLES_ADJACENCY: | ||
| 128 | case GL_TRIANGLE_STRIP_ADJACENCY: | ||
| 129 | return {"triangles_adjacency", 6}; | ||
| 130 | default: | ||
| 131 | return {"points", 1}; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | /// Hashes one (or two) program streams | 117 | /// Hashes one (or two) program streams |
| 136 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, | 118 | u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, |
| 137 | const ProgramCode& code_b) { | 119 | const ProgramCode& code_b = {}) { |
| 138 | u64 unique_identifier = boost::hash_value(code); | 120 | u64 unique_identifier = boost::hash_value(code); |
| 139 | if (is_a) { | 121 | if (is_a) { |
| 140 | // VertexA programs include two programs | 122 | // VertexA programs include two programs |
| @@ -143,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co | |||
| 143 | return unique_identifier; | 125 | return unique_identifier; |
| 144 | } | 126 | } |
| 145 | 127 | ||
| 146 | /// Creates an unspecialized program from code streams | ||
| 147 | std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, | ||
| 148 | const std::optional<ShaderIR>& ir_b) { | ||
| 149 | switch (shader_type) { | ||
| 150 | case ShaderType::Vertex: | ||
| 151 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); | ||
| 152 | case ShaderType::Geometry: | ||
| 153 | return GLShader::GenerateGeometryShader(device, ir); | ||
| 154 | case ShaderType::Fragment: | ||
| 155 | return GLShader::GenerateFragmentShader(device, ir); | ||
| 156 | case ShaderType::Compute: | ||
| 157 | return GLShader::GenerateComputeShader(device, ir); | ||
| 158 | default: | ||
| 159 | UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type)); | ||
| 160 | return {}; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | |||
| 164 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { | 128 | constexpr const char* GetShaderTypeName(ShaderType shader_type) { |
| 165 | switch (shader_type) { | 129 | switch (shader_type) { |
| 166 | case ShaderType::Vertex: | 130 | case ShaderType::Vertex: |
| @@ -196,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 196 | return {}; | 160 | return {}; |
| 197 | } | 161 | } |
| 198 | 162 | ||
| 199 | std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { | 163 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 200 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 164 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 201 | } | 165 | } |
| 202 | 166 | ||
| 203 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, | 167 | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { |
| 204 | ShaderType shader_type) { | 168 | const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |
| 205 | if (shader_type == ShaderType::Compute) { | 169 | const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, |
| 206 | return system.GPU().KeplerCompute(); | 170 | entry.graphics_info, entry.compute_info}; |
| 207 | } else { | 171 | const auto registry = std::make_shared<Registry>(entry.type, info); |
| 208 | return system.GPU().Maxwell3D(); | 172 | for (const auto& [address, value] : entry.keys) { |
| 209 | } | 173 | const auto [buffer, offset] = address; |
| 210 | } | 174 | registry->InsertKey(buffer, offset, value); |
| 211 | |||
| 212 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) { | ||
| 213 | return std::make_unique<ConstBufferLocker>(shader_type, | ||
| 214 | GetConstBufferEngineInterface(system, shader_type)); | ||
| 215 | } | ||
| 216 | |||
| 217 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||
| 218 | locker.SetBoundBuffer(usage.bound_buffer); | ||
| 219 | for (const auto& key : usage.keys) { | ||
| 220 | const auto [buffer, offset] = key.first; | ||
| 221 | locker.InsertKey(buffer, offset, key.second); | ||
| 222 | } | 175 | } |
| 223 | for (const auto& [offset, sampler] : usage.bound_samplers) { | 176 | for (const auto& [offset, sampler] : entry.bound_samplers) { |
| 224 | locker.InsertBoundSampler(offset, sampler); | 177 | registry->InsertBoundSampler(offset, sampler); |
| 225 | } | 178 | } |
| 226 | for (const auto& [key, sampler] : usage.bindless_samplers) { | 179 | for (const auto& [key, sampler] : entry.bindless_samplers) { |
| 227 | const auto [buffer, offset] = key; | 180 | const auto [buffer, offset] = key; |
| 228 | locker.InsertBindlessSampler(buffer, offset, sampler); | 181 | registry->InsertBindlessSampler(buffer, offset, sampler); |
| 229 | } | 182 | } |
| 183 | return registry; | ||
| 230 | } | 184 | } |
| 231 | 185 | ||
| 232 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, | 186 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, |
| 233 | const ProgramCode& code, const ProgramCode& code_b, | 187 | u64 unique_identifier, const ShaderIR& ir, |
| 234 | ConstBufferLocker& locker, const ProgramVariant& variant, | 188 | const Registry& registry, bool hint_retrievable = false) { |
| 235 | bool hint_retrievable = false) { | 189 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 236 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); | 190 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 237 | |||
| 238 | const bool is_compute = shader_type == ShaderType::Compute; | ||
| 239 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 240 | const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); | ||
| 241 | std::optional<ShaderIR> ir_b; | ||
| 242 | if (!code_b.empty()) { | ||
| 243 | ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); | ||
| 244 | } | ||
| 245 | |||
| 246 | std::string source = fmt::format(R"(// {} | ||
| 247 | #version 430 core | ||
| 248 | #extension GL_ARB_separate_shader_objects : enable | ||
| 249 | )", | ||
| 250 | GetShaderId(unique_identifier, shader_type)); | ||
| 251 | if (device.HasShaderBallot()) { | ||
| 252 | source += "#extension GL_ARB_shader_ballot : require\n"; | ||
| 253 | } | ||
| 254 | if (device.HasVertexViewportLayer()) { | ||
| 255 | source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; | ||
| 256 | } | ||
| 257 | if (device.HasImageLoadFormatted()) { | ||
| 258 | source += "#extension GL_EXT_shader_image_load_formatted : require\n"; | ||
| 259 | } | ||
| 260 | if (device.HasWarpIntrinsics()) { | ||
| 261 | source += "#extension GL_NV_gpu_shader5 : require\n" | ||
| 262 | "#extension GL_NV_shader_thread_group : require\n" | ||
| 263 | "#extension GL_NV_shader_thread_shuffle : require\n"; | ||
| 264 | } | ||
| 265 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations) | ||
| 266 | // on places where we don't want to. | ||
| 267 | // Thanks to Ryujinx for finding this workaround. | ||
| 268 | source += "#pragma optionNV(fastmath off)\n"; | ||
| 269 | |||
| 270 | if (shader_type == ShaderType::Geometry) { | ||
| 271 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); | ||
| 272 | source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); | ||
| 273 | source += fmt::format("layout ({}) in;\n", glsl_topology); | ||
| 274 | } | ||
| 275 | if (shader_type == ShaderType::Compute) { | ||
| 276 | if (variant.local_memory_size > 0) { | ||
| 277 | source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", | ||
| 278 | Common::AlignUp(variant.local_memory_size, 4) / 4); | ||
| 279 | } | ||
| 280 | source += | ||
| 281 | fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", | ||
| 282 | variant.block_x, variant.block_y, variant.block_z); | ||
| 283 | |||
| 284 | if (variant.shared_memory_size > 0) { | ||
| 285 | // shared_memory_size is described in number of words | ||
| 286 | source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); | ||
| 287 | } | ||
| 288 | } | ||
| 289 | |||
| 290 | source += '\n'; | ||
| 291 | source += GenerateGLSL(device, shader_type, ir, ir_b); | ||
| 292 | 191 | ||
| 192 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 293 | OGLShader shader; | 193 | OGLShader shader; |
| 294 | shader.Create(source.c_str(), GetGLShaderType(shader_type)); | 194 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |
| 295 | 195 | ||
| 296 | auto program = std::make_shared<OGLProgram>(); | 196 | auto program = std::make_shared<OGLProgram>(); |
| 297 | program->Create(true, hint_retrievable, shader.handle); | 197 | program->Create(true, hint_retrievable, shader.handle); |
| @@ -299,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp | |||
| 299 | } | 199 | } |
| 300 | 200 | ||
| 301 | std::unordered_set<GLenum> GetSupportedFormats() { | 201 | std::unordered_set<GLenum> GetSupportedFormats() { |
| 302 | GLint num_formats{}; | 202 | GLint num_formats; |
| 303 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 203 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |
| 304 | 204 | ||
| 305 | std::vector<GLint> formats(num_formats); | 205 | std::vector<GLint> formats(num_formats); |
| @@ -314,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 314 | 214 | ||
| 315 | } // Anonymous namespace | 215 | } // Anonymous namespace |
| 316 | 216 | ||
| 317 | CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, | 217 | CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 318 | GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) | 218 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 319 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, | 219 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) |
| 320 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, | 220 | : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 321 | unique_identifier{params.unique_identifier}, shader_type{shader_type}, | 221 | cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} |
| 322 | entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { | 222 | |
| 323 | if (!params.precompiled_variants) { | 223 | CachedShader::~CachedShader() = default; |
| 324 | return; | 224 | |
| 325 | } | 225 | GLuint CachedShader::GetHandle() const { |
| 326 | for (const auto& pair : *params.precompiled_variants) { | 226 | DEBUG_ASSERT(registry->IsConsistent()); |
| 327 | auto locker = MakeLocker(system, shader_type); | 227 | return program->handle; |
| 328 | const auto& usage = pair->first; | ||
| 329 | FillLocker(*locker, usage); | ||
| 330 | |||
| 331 | std::unique_ptr<LockerVariant>* locker_variant = nullptr; | ||
| 332 | const auto it = | ||
| 333 | std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { | ||
| 334 | return variant->locker->HasEqualKeys(*locker); | ||
| 335 | }); | ||
| 336 | if (it == locker_variants.end()) { | ||
| 337 | locker_variant = &locker_variants.emplace_back(); | ||
| 338 | *locker_variant = std::make_unique<LockerVariant>(); | ||
| 339 | locker_variant->get()->locker = std::move(locker); | ||
| 340 | } else { | ||
| 341 | locker_variant = &*it; | ||
| 342 | } | ||
| 343 | locker_variant->get()->programs.emplace(usage.variant, pair->second); | ||
| 344 | } | ||
| 345 | } | 228 | } |
| 346 | 229 | ||
| 347 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 230 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 348 | Maxwell::ShaderProgram program_type, ProgramCode code, | 231 | Maxwell::ShaderProgram program_type, ProgramCode code, |
| 349 | ProgramCode code_b) { | 232 | ProgramCode code_b) { |
| 350 | const auto shader_type = GetShaderType(program_type); | 233 | const auto shader_type = GetShaderType(program_type); |
| 351 | params.disk_cache.SaveRaw( | 234 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 352 | ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); | ||
| 353 | 235 | ||
| 354 | ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); | 236 | auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); |
| 355 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 237 | const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 356 | // TODO(Rodrigo): Handle VertexA shaders | 238 | // TODO(Rodrigo): Handle VertexA shaders |
| 357 | // std::optional<ShaderIR> ir_b; | 239 | // std::optional<ShaderIR> ir_b; |
| 358 | // if (!code_b.empty()) { | 240 | // if (!code_b.empty()) { |
| 359 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); | 241 | // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); |
| 360 | // } | 242 | // } |
| 361 | return std::shared_ptr<CachedShader>(new CachedShader( | 243 | auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); |
| 362 | params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); | 244 | |
| 245 | ShaderDiskCacheEntry entry; | ||
| 246 | entry.type = shader_type; | ||
| 247 | entry.code = std::move(code); | ||
| 248 | entry.code_b = std::move(code_b); | ||
| 249 | entry.unique_identifier = params.unique_identifier; | ||
| 250 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 251 | entry.graphics_info = registry->GetGraphicsInfo(); | ||
| 252 | entry.keys = registry->GetKeys(); | ||
| 253 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 255 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 256 | |||
| 257 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 258 | size_in_bytes, std::move(registry), | ||
| 259 | MakeEntries(ir), std::move(program))); | ||
| 363 | } | 260 | } |
| 364 | 261 | ||
| 365 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 262 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 366 | params.disk_cache.SaveRaw( | 263 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 367 | ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); | 264 | |
| 368 | 265 | auto& engine = params.system.GPU().KeplerCompute(); | |
| 369 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, | 266 | auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); |
| 370 | params.system.GPU().KeplerCompute()); | 267 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |
| 371 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); | 268 | const u64 uid = params.unique_identifier; |
| 372 | return std::shared_ptr<CachedShader>(new CachedShader( | 269 | auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); |
| 373 | params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | 270 | |
| 271 | ShaderDiskCacheEntry entry; | ||
| 272 | entry.type = ShaderType::Compute; | ||
| 273 | entry.code = std::move(code); | ||
| 274 | entry.unique_identifier = uid; | ||
| 275 | entry.bound_buffer = registry->GetBoundBuffer(); | ||
| 276 | entry.compute_info = registry->GetComputeInfo(); | ||
| 277 | entry.keys = registry->GetKeys(); | ||
| 278 | entry.bound_samplers = registry->GetBoundSamplers(); | ||
| 279 | entry.bindless_samplers = registry->GetBindlessSamplers(); | ||
| 280 | params.disk_cache.SaveEntry(std::move(entry)); | ||
| 281 | |||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | ||
| 283 | size_in_bytes, std::move(registry), | ||
| 284 | MakeEntries(ir), std::move(program))); | ||
| 374 | } | 285 | } |
| 375 | 286 | ||
| 376 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 287 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 377 | const UnspecializedShader& unspecialized) { | 288 | const PrecompiledShader& precompiled_shader, |
| 378 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, | 289 | std::size_t size_in_bytes) { |
| 379 | unspecialized.entries, unspecialized.code, | 290 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 380 | unspecialized.code_b)); | 291 | params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, |
| 381 | } | 292 | precompiled_shader.entries, precompiled_shader.program)); |
| 382 | |||
| 383 | GLuint CachedShader::GetHandle(const ProgramVariant& variant) { | ||
| 384 | EnsureValidLockerVariant(); | ||
| 385 | |||
| 386 | const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); | ||
| 387 | auto& program = entry->second; | ||
| 388 | if (!is_cache_miss) { | ||
| 389 | return program->handle; | ||
| 390 | } | ||
| 391 | |||
| 392 | program = BuildShader(device, unique_identifier, shader_type, code, code_b, | ||
| 393 | *curr_locker_variant->locker, variant); | ||
| 394 | disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); | ||
| 395 | |||
| 396 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||
| 397 | return program->handle; | ||
| 398 | } | ||
| 399 | |||
| 400 | bool CachedShader::EnsureValidLockerVariant() { | ||
| 401 | const auto previous_variant = curr_locker_variant; | ||
| 402 | if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { | ||
| 403 | curr_locker_variant = nullptr; | ||
| 404 | } | ||
| 405 | if (!curr_locker_variant) { | ||
| 406 | for (auto& variant : locker_variants) { | ||
| 407 | if (variant->locker->IsConsistent()) { | ||
| 408 | curr_locker_variant = variant.get(); | ||
| 409 | } | ||
| 410 | } | ||
| 411 | } | ||
| 412 | if (!curr_locker_variant) { | ||
| 413 | auto& new_variant = locker_variants.emplace_back(); | ||
| 414 | new_variant = std::make_unique<LockerVariant>(); | ||
| 415 | new_variant->locker = MakeLocker(system, shader_type); | ||
| 416 | curr_locker_variant = new_variant.get(); | ||
| 417 | } | ||
| 418 | return previous_variant == curr_locker_variant; | ||
| 419 | } | ||
| 420 | |||
| 421 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ||
| 422 | const ConstBufferLocker& locker) const { | ||
| 423 | return ShaderDiskCacheUsage{unique_identifier, variant, | ||
| 424 | locker.GetBoundBuffer(), locker.GetKeys(), | ||
| 425 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; | ||
| 426 | } | 293 | } |
| 427 | 294 | ||
| 428 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 295 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -432,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& | |||
| 432 | 299 | ||
| 433 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 300 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 434 | const VideoCore::DiskResourceLoadCallback& callback) { | 301 | const VideoCore::DiskResourceLoadCallback& callback) { |
| 435 | const auto transferable = disk_cache.LoadTransferable(); | 302 | const std::optional transferable = disk_cache.LoadTransferable(); |
| 436 | if (!transferable) { | 303 | if (!transferable) { |
| 437 | return; | 304 | return; |
| 438 | } | 305 | } |
| 439 | const auto [raws, shader_usages] = *transferable; | ||
| 440 | if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | ||
| 441 | return; | ||
| 442 | } | ||
| 443 | 306 | ||
| 444 | const auto dumps = disk_cache.LoadPrecompiled(); | 307 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); |
| 445 | const auto supported_formats = GetSupportedFormats(); | 308 | const auto supported_formats = GetSupportedFormats(); |
| 446 | 309 | ||
| 447 | // Track if precompiled cache was altered during loading to know if we have to | 310 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -450,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 450 | 313 | ||
| 451 | // Inform the frontend about shader build initialization | 314 | // Inform the frontend about shader build initialization |
| 452 | if (callback) { | 315 | if (callback) { |
| 453 | callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); | 316 | callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); |
| 454 | } | 317 | } |
| 455 | 318 | ||
| 456 | std::mutex mutex; | 319 | std::mutex mutex; |
| 457 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex | 320 | std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex |
| 458 | std::atomic_bool compilation_failed = false; | 321 | std::atomic_bool gl_cache_failed = false; |
| 459 | 322 | ||
| 460 | const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | 323 | const auto find_precompiled = [&gl_cache](u64 id) { |
| 461 | std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, | 324 | return std::find_if(gl_cache.begin(), gl_cache.end(), |
| 462 | const ShaderDumpsMap& dumps) { | 325 | [id](const auto& entry) { return entry.unique_identifier == id; }); |
| 326 | }; | ||
| 327 | |||
| 328 | const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, | ||
| 329 | std::size_t end) { | ||
| 463 | context->MakeCurrent(); | 330 | context->MakeCurrent(); |
| 464 | SCOPE_EXIT({ return context->DoneCurrent(); }); | 331 | SCOPE_EXIT({ return context->DoneCurrent(); }); |
| 465 | 332 | ||
| 466 | for (std::size_t i = begin; i < end; ++i) { | 333 | for (std::size_t i = begin; i < end; ++i) { |
| 467 | if (stop_loading || compilation_failed) { | 334 | if (stop_loading) { |
| 468 | return; | 335 | return; |
| 469 | } | 336 | } |
| 470 | const auto& usage{shader_usages[i]}; | 337 | const auto& entry = (*transferable)[i]; |
| 471 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | 338 | const u64 uid = entry.unique_identifier; |
| 472 | const auto dump{dumps.find(usage)}; | 339 | const auto it = find_precompiled(uid); |
| 473 | 340 | const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | |
| 474 | CachedProgram shader; | 341 | |
| 475 | if (dump != dumps.end()) { | 342 | const bool is_compute = entry.type == ShaderType::Compute; |
| 476 | // If the shader is dumped, attempt to load it with | 343 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 477 | shader = GeneratePrecompiledProgram(dump->second, supported_formats); | 344 | auto registry = MakeRegistry(entry); |
| 478 | if (!shader) { | 345 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |
| 479 | compilation_failed = true; | 346 | |
| 480 | return; | 347 | std::shared_ptr<OGLProgram> program; |
| 348 | if (precompiled_entry) { | ||
| 349 | // If the shader is precompiled, attempt to load it with | ||
| 350 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | ||
| 351 | if (!program) { | ||
| 352 | gl_cache_failed = true; | ||
| 481 | } | 353 | } |
| 482 | } | 354 | } |
| 483 | if (!shader) { | 355 | if (!program) { |
| 484 | auto locker{MakeLocker(system, unspecialized.type)}; | 356 | // Otherwise compile it from GLSL |
| 485 | FillLocker(*locker, usage); | 357 | program = BuildShader(device, entry.type, uid, ir, *registry, true); |
| 486 | |||
| 487 | shader = BuildShader(device, usage.unique_identifier, unspecialized.type, | ||
| 488 | unspecialized.code, unspecialized.code_b, *locker, | ||
| 489 | usage.variant, true); | ||
| 490 | } | 358 | } |
| 491 | 359 | ||
| 360 | PrecompiledShader shader; | ||
| 361 | shader.program = std::move(program); | ||
| 362 | shader.registry = std::move(registry); | ||
| 363 | shader.entries = MakeEntries(ir); | ||
| 364 | |||
| 492 | std::scoped_lock lock{mutex}; | 365 | std::scoped_lock lock{mutex}; |
| 493 | if (callback) { | 366 | if (callback) { |
| 494 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | 367 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |
| 495 | shader_usages.size()); | 368 | transferable->size()); |
| 496 | } | 369 | } |
| 497 | 370 | runtime_cache.emplace(entry.unique_identifier, std::move(shader)); | |
| 498 | precompiled_programs.emplace(usage, std::move(shader)); | ||
| 499 | |||
| 500 | // TODO(Rodrigo): Is there a better way to do this? | ||
| 501 | precompiled_variants[usage.unique_identifier].push_back( | ||
| 502 | precompiled_programs.find(usage)); | ||
| 503 | } | 371 | } |
| 504 | }; | 372 | }; |
| 505 | 373 | ||
| 506 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; | 374 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; |
| 507 | const std::size_t bucket_size{shader_usages.size() / num_workers}; | 375 | const std::size_t bucket_size{transferable->size() / num_workers}; |
| 508 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | 376 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |
| 509 | std::vector<std::thread> threads(num_workers); | 377 | std::vector<std::thread> threads(num_workers); |
| 510 | for (std::size_t i = 0; i < num_workers; ++i) { | 378 | for (std::size_t i = 0; i < num_workers; ++i) { |
| 511 | const bool is_last_worker = i + 1 == num_workers; | 379 | const bool is_last_worker = i + 1 == num_workers; |
| 512 | const std::size_t start{bucket_size * i}; | 380 | const std::size_t start{bucket_size * i}; |
| 513 | const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; | 381 | const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; |
| 514 | 382 | ||
| 515 | // On some platforms the shared context has to be created from the GUI thread | 383 | // On some platforms the shared context has to be created from the GUI thread |
| 516 | contexts[i] = emu_window.CreateSharedContext(); | 384 | contexts[i] = emu_window.CreateSharedContext(); |
| 517 | threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); | 385 | threads[i] = std::thread(worker, contexts[i].get(), start, end); |
| 518 | } | 386 | } |
| 519 | for (auto& thread : threads) { | 387 | for (auto& thread : threads) { |
| 520 | thread.join(); | 388 | thread.join(); |
| 521 | } | 389 | } |
| 522 | 390 | ||
| 523 | if (compilation_failed) { | 391 | if (gl_cache_failed) { |
| 524 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 392 | // Invalidate the precompiled cache if a shader dumped shader was rejected |
| 525 | disk_cache.InvalidatePrecompiled(); | 393 | disk_cache.InvalidatePrecompiled(); |
| 526 | precompiled_cache_altered = true; | 394 | precompiled_cache_altered = true; |
| @@ -533,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 533 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 401 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 534 | // before precompiling them | 402 | // before precompiling them |
| 535 | 403 | ||
| 536 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { | 404 | for (std::size_t i = 0; i < transferable->size(); ++i) { |
| 537 | const auto& usage{shader_usages[i]}; | 405 | const u64 id = (*transferable)[i].unique_identifier; |
| 538 | if (dumps.find(usage) == dumps.end()) { | 406 | const auto it = find_precompiled(id); |
| 539 | const auto& program{precompiled_programs.at(usage)}; | 407 | if (it == gl_cache.end()) { |
| 540 | disk_cache.SaveDump(usage, program->handle); | 408 | const GLuint program = runtime_cache.at(id).program->handle; |
| 409 | disk_cache.SavePrecompiled(id, program); | ||
| 541 | precompiled_cache_altered = true; | 410 | precompiled_cache_altered = true; |
| 542 | } | 411 | } |
| 543 | } | 412 | } |
| @@ -547,80 +416,29 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 547 | } | 416 | } |
| 548 | } | 417 | } |
| 549 | 418 | ||
| 550 | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { | 419 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 551 | const auto it = precompiled_variants.find(unique_identifier); | 420 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 552 | return it == precompiled_variants.end() ? nullptr : &it->second; | 421 | const std::unordered_set<GLenum>& supported_formats) { |
| 553 | } | 422 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| 554 | 423 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); | |
| 555 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 556 | const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||
| 557 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | ||
| 558 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | ||
| 559 | return {}; | 424 | return {}; |
| 560 | } | 425 | } |
| 561 | 426 | ||
| 562 | CachedProgram shader = std::make_shared<OGLProgram>(); | 427 | auto program = std::make_shared<OGLProgram>(); |
| 563 | shader->handle = glCreateProgram(); | 428 | program->handle = glCreateProgram(); |
| 564 | glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 429 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 565 | glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), | 430 | glProgramBinary(program->handle, precompiled_entry.binary_format, |
| 566 | static_cast<GLsizei>(dump.binary.size())); | 431 | precompiled_entry.binary.data(), |
| 567 | 432 | static_cast<GLsizei>(precompiled_entry.binary.size())); | |
| 568 | GLint link_status{}; | 433 | |
| 569 | glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); | 434 | GLint link_status; |
| 435 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | ||
| 570 | if (link_status == GL_FALSE) { | 436 | if (link_status == GL_FALSE) { |
| 571 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); | 437 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 572 | return {}; | 438 | return {}; |
| 573 | } | 439 | } |
| 574 | 440 | ||
| 575 | return shader; | 441 | return program; |
| 576 | } | ||
| 577 | |||
| 578 | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | ||
| 579 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | ||
| 580 | const std::vector<ShaderDiskCacheRaw>& raws) { | ||
| 581 | if (callback) { | ||
| 582 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | ||
| 583 | } | ||
| 584 | |||
| 585 | for (std::size_t i = 0; i < raws.size(); ++i) { | ||
| 586 | if (stop_loading) { | ||
| 587 | return false; | ||
| 588 | } | ||
| 589 | const auto& raw{raws[i]}; | ||
| 590 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | ||
| 591 | const u64 calculated_hash{ | ||
| 592 | GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; | ||
| 593 | if (unique_identifier != calculated_hash) { | ||
| 594 | LOG_ERROR(Render_OpenGL, | ||
| 595 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " | ||
| 596 | "removing shader cache", | ||
| 597 | raw.GetUniqueIdentifier(), calculated_hash); | ||
| 598 | disk_cache.InvalidateTransferable(); | ||
| 599 | return false; | ||
| 600 | } | ||
| 601 | |||
| 602 | const u32 main_offset = | ||
| 603 | raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 604 | ConstBufferLocker locker(raw.GetType()); | ||
| 605 | const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); | ||
| 606 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 607 | // std::optional<ShaderIR> ir_b; | ||
| 608 | // if (raw.HasProgramA()) { | ||
| 609 | // ir_b.emplace(raw.GetProgramCodeB(), main_offset); | ||
| 610 | // } | ||
| 611 | |||
| 612 | UnspecializedShader unspecialized; | ||
| 613 | unspecialized.entries = GLShader::GetEntries(ir); | ||
| 614 | unspecialized.type = raw.GetType(); | ||
| 615 | unspecialized.code = raw.GetCode(); | ||
| 616 | unspecialized.code_b = raw.GetCodeB(); | ||
| 617 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); | ||
| 618 | |||
| 619 | if (callback) { | ||
| 620 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | ||
| 621 | } | ||
| 622 | } | ||
| 623 | return true; | ||
| 624 | } | 442 | } |
| 625 | 443 | ||
| 626 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 444 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| @@ -648,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 648 | 466 | ||
| 649 | const auto unique_identifier = GetUniqueIdentifier( | 467 | const auto unique_identifier = GetUniqueIdentifier( |
| 650 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 468 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 651 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 652 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 469 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 653 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 470 | const ShaderParameters params{system, disk_cache, device, |
| 654 | cpu_addr, host_ptr, unique_identifier}; | 471 | cpu_addr, host_ptr, unique_identifier}; |
| 655 | 472 | ||
| 656 | const auto found = unspecialized_shaders.find(unique_identifier); | 473 | const auto found = runtime_cache.find(unique_identifier); |
| 657 | if (found == unspecialized_shaders.end()) { | 474 | if (found == runtime_cache.end()) { |
| 658 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | 475 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), |
| 659 | std::move(code_b)); | 476 | std::move(code_b)); |
| 660 | } else { | 477 | } else { |
| 661 | shader = CachedShader::CreateFromCache(params, found->second); | 478 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 479 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 662 | } | 480 | } |
| 663 | Register(shader); | 481 | Register(shader); |
| 664 | 482 | ||
| @@ -673,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 673 | return kernel; | 491 | return kernel; |
| 674 | } | 492 | } |
| 675 | 493 | ||
| 676 | // No kernel found - create a new one | 494 | // No kernel found, create a new one |
| 677 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 495 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 678 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; | 496 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |
| 679 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 680 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 497 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 681 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, | 498 | const ShaderParameters params{system, disk_cache, device, |
| 682 | cpu_addr, host_ptr, unique_identifier}; | 499 | cpu_addr, host_ptr, unique_identifier}; |
| 683 | 500 | ||
| 684 | const auto found = unspecialized_shaders.find(unique_identifier); | 501 | const auto found = runtime_cache.find(unique_identifier); |
| 685 | if (found == unspecialized_shaders.end()) { | 502 | if (found == runtime_cache.end()) { |
| 686 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 503 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |
| 687 | } else { | 504 | } else { |
| 688 | kernel = CachedShader::CreateFromCache(params, found->second); | 505 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 506 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 689 | } | 507 | } |
| 690 | 508 | ||
| 691 | Register(kernel); | 509 | Register(kernel); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 7b1470db3..4935019fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/shader/const_buffer_locker.h" | 25 | #include "video_core/shader/registry.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 26 | #include "video_core/shader/shader_ir.h" |
| 27 | 27 | ||
| 28 | namespace Core { | 28 | namespace Core { |
| @@ -41,22 +41,17 @@ class RasterizerOpenGL; | |||
| 41 | struct UnspecializedShader; | 41 | struct UnspecializedShader; |
| 42 | 42 | ||
| 43 | using Shader = std::shared_ptr<CachedShader>; | 43 | using Shader = std::shared_ptr<CachedShader>; |
| 44 | using CachedProgram = std::shared_ptr<OGLProgram>; | ||
| 45 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 46 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 45 | |
| 47 | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; | 46 | struct PrecompiledShader { |
| 48 | 47 | std::shared_ptr<OGLProgram> program; | |
| 49 | struct UnspecializedShader { | 48 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 50 | GLShader::ShaderEntries entries; | 49 | ShaderEntries entries; |
| 51 | Tegra::Engines::ShaderType type; | ||
| 52 | ProgramCode code; | ||
| 53 | ProgramCode code_b; | ||
| 54 | }; | 50 | }; |
| 55 | 51 | ||
| 56 | struct ShaderParameters { | 52 | struct ShaderParameters { |
| 57 | Core::System& system; | 53 | Core::System& system; |
| 58 | ShaderDiskCacheOpenGL& disk_cache; | 54 | ShaderDiskCacheOpenGL& disk_cache; |
| 59 | const PrecompiledVariants* precompiled_variants; | ||
| 60 | const Device& device; | 55 | const Device& device; |
| 61 | VAddr cpu_addr; | 56 | VAddr cpu_addr; |
| 62 | u8* host_ptr; | 57 | u8* host_ptr; |
| @@ -65,61 +60,45 @@ struct ShaderParameters { | |||
| 65 | 60 | ||
| 66 | class CachedShader final : public RasterizerCacheObject { | 61 | class CachedShader final : public RasterizerCacheObject { |
| 67 | public: | 62 | public: |
| 68 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 63 | ~CachedShader(); |
| 69 | Maxwell::ShaderProgram program_type, | ||
| 70 | ProgramCode program_code, ProgramCode program_code_b); | ||
| 71 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | ||
| 72 | 64 | ||
| 73 | static Shader CreateFromCache(const ShaderParameters& params, | 65 | /// Gets the GL program handle for the shader |
| 74 | const UnspecializedShader& unspecialized); | 66 | GLuint GetHandle() const; |
| 75 | 67 | ||
| 68 | /// Returns the guest CPU address of the shader | ||
| 76 | VAddr GetCpuAddr() const override { | 69 | VAddr GetCpuAddr() const override { |
| 77 | return cpu_addr; | 70 | return cpu_addr; |
| 78 | } | 71 | } |
| 79 | 72 | ||
| 73 | /// Returns the size in bytes of the shader | ||
| 80 | std::size_t GetSizeInBytes() const override { | 74 | std::size_t GetSizeInBytes() const override { |
| 81 | return code.size() * sizeof(u64); | 75 | return size_in_bytes; |
| 82 | } | 76 | } |
| 83 | 77 | ||
| 84 | /// Gets the shader entries for the shader | 78 | /// Gets the shader entries for the shader |
| 85 | const GLShader::ShaderEntries& GetShaderEntries() const { | 79 | const ShaderEntries& GetEntries() const { |
| 86 | return entries; | 80 | return entries; |
| 87 | } | 81 | } |
| 88 | 82 | ||
| 89 | /// Gets the GL program handle for the shader | 83 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 90 | GLuint GetHandle(const ProgramVariant& variant); | 84 | Maxwell::ShaderProgram program_type, |
| 91 | 85 | ProgramCode program_code, ProgramCode program_code_b); | |
| 92 | private: | 86 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); |
| 93 | struct LockerVariant { | ||
| 94 | std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||
| 95 | std::unordered_map<ProgramVariant, CachedProgram> programs; | ||
| 96 | }; | ||
| 97 | |||
| 98 | explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, | ||
| 99 | GLShader::ShaderEntries entries, ProgramCode program_code, | ||
| 100 | ProgramCode program_code_b); | ||
| 101 | |||
| 102 | bool EnsureValidLockerVariant(); | ||
| 103 | |||
| 104 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, | ||
| 105 | const VideoCommon::Shader::ConstBufferLocker& locker) const; | ||
| 106 | |||
| 107 | Core::System& system; | ||
| 108 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 109 | const Device& device; | ||
| 110 | |||
| 111 | VAddr cpu_addr{}; | ||
| 112 | |||
| 113 | u64 unique_identifier{}; | ||
| 114 | Tegra::Engines::ShaderType shader_type{}; | ||
| 115 | |||
| 116 | GLShader::ShaderEntries entries; | ||
| 117 | 87 | ||
| 118 | ProgramCode code; | 88 | static Shader CreateFromCache(const ShaderParameters& params, |
| 119 | ProgramCode code_b; | 89 | const PrecompiledShader& precompiled_shader, |
| 90 | std::size_t size_in_bytes); | ||
| 120 | 91 | ||
| 121 | LockerVariant* curr_locker_variant = nullptr; | 92 | private: |
| 122 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; | 93 | explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, |
| 94 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | ||
| 95 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | ||
| 96 | |||
| 97 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | ||
| 98 | ShaderEntries entries; | ||
| 99 | VAddr cpu_addr = 0; | ||
| 100 | std::size_t size_in_bytes = 0; | ||
| 101 | std::shared_ptr<OGLProgram> program; | ||
| 123 | }; | 102 | }; |
| 124 | 103 | ||
| 125 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 104 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -142,25 +121,15 @@ protected: | |||
| 142 | void FlushObjectInner(const Shader& object) override {} | 121 | void FlushObjectInner(const Shader& object) override {} |
| 143 | 122 | ||
| 144 | private: | 123 | private: |
| 145 | bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, | 124 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( |
| 146 | const VideoCore::DiskResourceLoadCallback& callback, | 125 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 147 | const std::vector<ShaderDiskCacheRaw>& raws); | 126 | const std::unordered_set<GLenum>& supported_formats); |
| 148 | |||
| 149 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | ||
| 150 | const std::unordered_set<GLenum>& supported_formats); | ||
| 151 | |||
| 152 | const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||
| 153 | 127 | ||
| 154 | Core::System& system; | 128 | Core::System& system; |
| 155 | Core::Frontend::EmuWindow& emu_window; | 129 | Core::Frontend::EmuWindow& emu_window; |
| 156 | const Device& device; | 130 | const Device& device; |
| 157 | |||
| 158 | ShaderDiskCacheOpenGL disk_cache; | 131 | ShaderDiskCacheOpenGL disk_cache; |
| 159 | 132 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | |
| 160 | PrecompiledPrograms precompiled_programs; | ||
| 161 | std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||
| 162 | |||
| 163 | std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||
| 164 | 133 | ||
| 165 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 134 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 166 | }; | 135 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3a41ed30c..2c38f57fd 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -23,8 +23,9 @@ | |||
| 23 | #include "video_core/shader/ast.h" | 23 | #include "video_core/shader/ast.h" |
| 24 | #include "video_core/shader/node.h" | 24 | #include "video_core/shader/node.h" |
| 25 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | #include "video_core/shader/transform_feedback.h" | ||
| 26 | 27 | ||
| 27 | namespace OpenGL::GLShader { | 28 | namespace OpenGL { |
| 28 | 29 | ||
| 29 | namespace { | 30 | namespace { |
| 30 | 31 | ||
| @@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode; | |||
| 36 | using Tegra::Shader::IpaMode; | 37 | using Tegra::Shader::IpaMode; |
| 37 | using Tegra::Shader::IpaSampleMode; | 38 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using VideoCommon::Shader::BuildTransformFeedback; | ||
| 41 | using VideoCommon::Shader::Registry; | ||
| 39 | 42 | ||
| 40 | using namespace std::string_literals; | 43 | using namespace std::string_literals; |
| 41 | using namespace VideoCommon::Shader; | 44 | using namespace VideoCommon::Shader; |
| @@ -48,6 +51,11 @@ class ExprDecompiler; | |||
| 48 | 51 | ||
| 49 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; | 52 | enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; |
| 50 | 53 | ||
| 54 | constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; | ||
| 55 | |||
| 56 | constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; | ||
| 57 | constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; | ||
| 58 | |||
| 51 | struct TextureOffset {}; | 59 | struct TextureOffset {}; |
| 52 | struct TextureDerivates {}; | 60 | struct TextureDerivates {}; |
| 53 | using TextureArgument = std::pair<Type, Node>; | 61 | using TextureArgument = std::pair<Type, Node>; |
| @@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| 56 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 64 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 57 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); | 65 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 58 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | ||
| 68 | #define ftou floatBitsToUint | ||
| 69 | #define itof intBitsToFloat | ||
| 70 | #define utof uintBitsToFloat | ||
| 71 | |||
| 72 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | ||
| 73 | bvec2 is_nan1 = isnan(pair1); | ||
| 74 | bvec2 is_nan2 = isnan(pair2); | ||
| 75 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 76 | }} | ||
| 77 | |||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 80 | |||
| 81 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 82 | float y_direction; | ||
| 83 | }}; | ||
| 84 | )"; | ||
| 85 | |||
| 59 | class ShaderWriter final { | 86 | class ShaderWriter final { |
| 60 | public: | 87 | public: |
| 61 | void AddExpression(std::string_view text) { | 88 | void AddExpression(std::string_view text) { |
| @@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | |||
| 269 | } | 296 | } |
| 270 | } | 297 | } |
| 271 | 298 | ||
| 299 | /// Describes primitive behavior on geometry shaders | ||
| 300 | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||
| 301 | switch (topology) { | ||
| 302 | case Maxwell::PrimitiveTopology::Points: | ||
| 303 | return {"points", 1}; | ||
| 304 | case Maxwell::PrimitiveTopology::Lines: | ||
| 305 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 306 | return {"lines", 2}; | ||
| 307 | case Maxwell::PrimitiveTopology::LinesAdjacency: | ||
| 308 | case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||
| 309 | return {"lines_adjacency", 4}; | ||
| 310 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 311 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 312 | case Maxwell::PrimitiveTopology::TriangleFan: | ||
| 313 | return {"triangles", 3}; | ||
| 314 | case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||
| 315 | case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||
| 316 | return {"triangles_adjacency", 6}; | ||
| 317 | default: | ||
| 318 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 319 | return {"points", 1}; | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 272 | /// Generates code to use for a swizzle operation. | 323 | /// Generates code to use for a swizzle operation. |
| 273 | constexpr const char* GetSwizzle(u32 element) { | 324 | constexpr const char* GetSwizzle(std::size_t element) { |
| 274 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | 325 | constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |
| 275 | return swizzle.at(element); | 326 | return swizzle.at(element); |
| 276 | } | 327 | } |
| 277 | 328 | ||
| 329 | constexpr const char* GetColorSwizzle(std::size_t element) { | ||
| 330 | constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; | ||
| 331 | return swizzle.at(element); | ||
| 332 | } | ||
| 333 | |||
| 278 | /// Translate topology | 334 | /// Translate topology |
| 279 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | 335 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 280 | switch (topology) { | 336 | switch (topology) { |
| @@ -337,15 +393,66 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 337 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 393 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 338 | } | 394 | } |
| 339 | 395 | ||
| 340 | [[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { | 396 | struct GenericVaryingDescription { |
| 341 | return stage == ShaderType::Vertex; | 397 | std::string name; |
| 342 | } | 398 | u8 first_element = 0; |
| 399 | bool is_scalar = false; | ||
| 400 | }; | ||
| 343 | 401 | ||
| 344 | class GLSLDecompiler final { | 402 | class GLSLDecompiler final { |
| 345 | public: | 403 | public: |
| 346 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, | 404 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 347 | std::string suffix) | 405 | ShaderType stage, std::string_view identifier, std::string_view suffix) |
| 348 | : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | 406 | : device{device}, ir{ir}, registry{registry}, stage{stage}, |
| 407 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { | ||
| 408 | if (stage != ShaderType::Compute) { | ||
| 409 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 410 | } | ||
| 411 | } | ||
| 412 | |||
| 413 | void Decompile() { | ||
| 414 | DeclareHeader(); | ||
| 415 | DeclareVertex(); | ||
| 416 | DeclareGeometry(); | ||
| 417 | DeclareFragment(); | ||
| 418 | DeclareCompute(); | ||
| 419 | DeclareInputAttributes(); | ||
| 420 | DeclareOutputAttributes(); | ||
| 421 | DeclareImages(); | ||
| 422 | DeclareSamplers(); | ||
| 423 | DeclareGlobalMemory(); | ||
| 424 | DeclareConstantBuffers(); | ||
| 425 | DeclareLocalMemory(); | ||
| 426 | DeclareRegisters(); | ||
| 427 | DeclarePredicates(); | ||
| 428 | DeclareInternalFlags(); | ||
| 429 | DeclareCustomVariables(); | ||
| 430 | DeclarePhysicalAttributeReader(); | ||
| 431 | |||
| 432 | code.AddLine("void main() {{"); | ||
| 433 | ++code.scope; | ||
| 434 | |||
| 435 | if (stage == ShaderType::Vertex) { | ||
| 436 | code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||
| 437 | } | ||
| 438 | |||
| 439 | if (ir.IsDecompiled()) { | ||
| 440 | DecompileAST(); | ||
| 441 | } else { | ||
| 442 | DecompileBranchMode(); | ||
| 443 | } | ||
| 444 | |||
| 445 | --code.scope; | ||
| 446 | code.AddLine("}}"); | ||
| 447 | } | ||
| 448 | |||
| 449 | std::string GetResult() { | ||
| 450 | return code.GetResult(); | ||
| 451 | } | ||
| 452 | |||
| 453 | private: | ||
| 454 | friend class ASTDecompiler; | ||
| 455 | friend class ExprDecompiler; | ||
| 349 | 456 | ||
| 350 | void DecompileBranchMode() { | 457 | void DecompileBranchMode() { |
| 351 | // VM's program counter | 458 | // VM's program counter |
| @@ -387,46 +494,40 @@ public: | |||
| 387 | 494 | ||
| 388 | void DecompileAST(); | 495 | void DecompileAST(); |
| 389 | 496 | ||
| 390 | void Decompile() { | 497 | void DeclareHeader() { |
| 391 | DeclareVertex(); | 498 | if (!identifier.empty()) { |
| 392 | DeclareGeometry(); | 499 | code.AddLine("// {}", identifier); |
| 393 | DeclareRegisters(); | 500 | } |
| 394 | DeclareCustomVariables(); | 501 | code.AddLine("#version 440 core"); |
| 395 | DeclarePredicates(); | 502 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 396 | DeclareLocalMemory(); | 503 | if (device.HasShaderBallot()) { |
| 397 | DeclareInternalFlags(); | 504 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| 398 | DeclareInputAttributes(); | 505 | } |
| 399 | DeclareOutputAttributes(); | 506 | if (device.HasVertexViewportLayer()) { |
| 400 | DeclareConstantBuffers(); | 507 | code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); |
| 401 | DeclareGlobalMemory(); | ||
| 402 | DeclareSamplers(); | ||
| 403 | DeclareImages(); | ||
| 404 | DeclarePhysicalAttributeReader(); | ||
| 405 | |||
| 406 | code.AddLine("void execute_{}() {{", suffix); | ||
| 407 | ++code.scope; | ||
| 408 | |||
| 409 | if (ir.IsDecompiled()) { | ||
| 410 | DecompileAST(); | ||
| 411 | } else { | ||
| 412 | DecompileBranchMode(); | ||
| 413 | } | 508 | } |
| 509 | if (device.HasImageLoadFormatted()) { | ||
| 510 | code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); | ||
| 511 | } | ||
| 512 | if (device.HasWarpIntrinsics()) { | ||
| 513 | code.AddLine("#extension GL_NV_gpu_shader5 : require"); | ||
| 514 | code.AddLine("#extension GL_NV_shader_thread_group : require"); | ||
| 515 | code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); | ||
| 516 | } | ||
| 517 | // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 | ||
| 518 | // operations) on places where we don't want to. | ||
| 519 | // Thanks to Ryujinx for finding this workaround. | ||
| 520 | code.AddLine("#pragma optionNV(fastmath off)"); | ||
| 414 | 521 | ||
| 415 | --code.scope; | 522 | code.AddNewLine(); |
| 416 | code.AddLine("}}"); | ||
| 417 | } | ||
| 418 | 523 | ||
| 419 | std::string GetResult() { | 524 | code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); |
| 420 | return code.GetResult(); | ||
| 421 | } | 525 | } |
| 422 | 526 | ||
| 423 | private: | ||
| 424 | friend class ASTDecompiler; | ||
| 425 | friend class ExprDecompiler; | ||
| 426 | |||
| 427 | void DeclareVertex() { | 527 | void DeclareVertex() { |
| 428 | if (!IsVertexShader(stage)) | 528 | if (stage != ShaderType::Vertex) { |
| 429 | return; | 529 | return; |
| 530 | } | ||
| 430 | 531 | ||
| 431 | DeclareVertexRedeclarations(); | 532 | DeclareVertexRedeclarations(); |
| 432 | } | 533 | } |
| @@ -436,9 +537,15 @@ private: | |||
| 436 | return; | 537 | return; |
| 437 | } | 538 | } |
| 438 | 539 | ||
| 540 | const auto& info = registry.GetGraphicsInfo(); | ||
| 541 | const auto input_topology = info.primitive_topology; | ||
| 542 | const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||
| 543 | max_input_vertices = max_vertices; | ||
| 544 | code.AddLine("layout ({}) in;", glsl_topology); | ||
| 545 | |||
| 439 | const auto topology = GetTopologyName(header.common3.output_topology); | 546 | const auto topology = GetTopologyName(header.common3.output_topology); |
| 440 | const auto max_vertices = header.common4.max_output_vertices.Value(); | 547 | const auto max_output_vertices = header.common4.max_output_vertices.Value(); |
| 441 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); | 548 | code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); |
| 442 | code.AddNewLine(); | 549 | code.AddNewLine(); |
| 443 | 550 | ||
| 444 | code.AddLine("in gl_PerVertex {{"); | 551 | code.AddLine("in gl_PerVertex {{"); |
| @@ -450,11 +557,40 @@ private: | |||
| 450 | DeclareVertexRedeclarations(); | 557 | DeclareVertexRedeclarations(); |
| 451 | } | 558 | } |
| 452 | 559 | ||
| 560 | void DeclareFragment() { | ||
| 561 | if (stage != ShaderType::Fragment) { | ||
| 562 | return; | ||
| 563 | } | ||
| 564 | for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | ||
| 565 | code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 569 | void DeclareCompute() { | ||
| 570 | if (stage != ShaderType::Compute) { | ||
| 571 | return; | ||
| 572 | } | ||
| 573 | const auto& info = registry.GetComputeInfo(); | ||
| 574 | if (const u32 size = info.shared_memory_size_in_words; size > 0) { | ||
| 575 | code.AddLine("shared uint smem[{}];", size); | ||
| 576 | code.AddNewLine(); | ||
| 577 | } | ||
| 578 | code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||
| 579 | info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||
| 580 | code.AddNewLine(); | ||
| 581 | } | ||
| 582 | |||
| 453 | void DeclareVertexRedeclarations() { | 583 | void DeclareVertexRedeclarations() { |
| 454 | code.AddLine("out gl_PerVertex {{"); | 584 | code.AddLine("out gl_PerVertex {{"); |
| 455 | ++code.scope; | 585 | ++code.scope; |
| 456 | 586 | ||
| 457 | code.AddLine("vec4 gl_Position;"); | 587 | auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); |
| 588 | if (!pos_xfb.empty()) { | ||
| 589 | pos_xfb = fmt::format("layout ({}) ", pos_xfb); | ||
| 590 | } | ||
| 591 | const char* pos_type = | ||
| 592 | FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); | ||
| 593 | code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); | ||
| 458 | 594 | ||
| 459 | for (const auto attribute : ir.GetOutputAttributes()) { | 595 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 460 | if (attribute == Attribute::Index::ClipDistances0123 || | 596 | if (attribute == Attribute::Index::ClipDistances0123 || |
| @@ -463,14 +599,14 @@ private: | |||
| 463 | break; | 599 | break; |
| 464 | } | 600 | } |
| 465 | } | 601 | } |
| 466 | if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { | 602 | if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { |
| 467 | if (ir.UsesLayer()) { | 603 | if (ir.UsesLayer()) { |
| 468 | code.AddLine("int gl_Layer;"); | 604 | code.AddLine("int gl_Layer;"); |
| 469 | } | 605 | } |
| 470 | if (ir.UsesViewportIndex()) { | 606 | if (ir.UsesViewportIndex()) { |
| 471 | code.AddLine("int gl_ViewportIndex;"); | 607 | code.AddLine("int gl_ViewportIndex;"); |
| 472 | } | 608 | } |
| 473 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && | 609 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && |
| 474 | !device.HasVertexViewportLayer()) { | 610 | !device.HasVertexViewportLayer()) { |
| 475 | LOG_ERROR( | 611 | LOG_ERROR( |
| 476 | Render_OpenGL, | 612 | Render_OpenGL, |
| @@ -525,18 +661,16 @@ private: | |||
| 525 | } | 661 | } |
| 526 | 662 | ||
| 527 | void DeclareLocalMemory() { | 663 | void DeclareLocalMemory() { |
| 664 | u64 local_memory_size = 0; | ||
| 528 | if (stage == ShaderType::Compute) { | 665 | if (stage == ShaderType::Compute) { |
| 529 | code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); | 666 | local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; |
| 530 | code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); | 667 | } else { |
| 531 | code.AddLine("#endif"); | 668 | local_memory_size = header.GetLocalMemorySize(); |
| 532 | return; | ||
| 533 | } | 669 | } |
| 534 | |||
| 535 | const u64 local_memory_size = header.GetLocalMemorySize(); | ||
| 536 | if (local_memory_size == 0) { | 670 | if (local_memory_size == 0) { |
| 537 | return; | 671 | return; |
| 538 | } | 672 | } |
| 539 | const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | 673 | const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; |
| 540 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | 674 | code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); |
| 541 | code.AddNewLine(); | 675 | code.AddNewLine(); |
| 542 | } | 676 | } |
| @@ -589,7 +723,7 @@ private: | |||
| 589 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { | 723 | void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { |
| 590 | const u32 location{GetGenericAttributeIndex(index)}; | 724 | const u32 location{GetGenericAttributeIndex(index)}; |
| 591 | 725 | ||
| 592 | std::string name{GetInputAttribute(index)}; | 726 | std::string name{GetGenericInputAttribute(index)}; |
| 593 | if (stage == ShaderType::Geometry) { | 727 | if (stage == ShaderType::Geometry) { |
| 594 | name = "gs_" + name + "[]"; | 728 | name = "gs_" + name + "[]"; |
| 595 | } | 729 | } |
| @@ -626,9 +760,59 @@ private: | |||
| 626 | } | 760 | } |
| 627 | } | 761 | } |
| 628 | 762 | ||
| 763 | std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { | ||
| 764 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 765 | const auto it = transform_feedback.find(location); | ||
| 766 | if (it == transform_feedback.end()) { | ||
| 767 | return {}; | ||
| 768 | } | ||
| 769 | return it->second.components; | ||
| 770 | } | ||
| 771 | |||
| 772 | std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { | ||
| 773 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 774 | const auto it = transform_feedback.find(location); | ||
| 775 | if (it == transform_feedback.end()) { | ||
| 776 | return {}; | ||
| 777 | } | ||
| 778 | |||
| 779 | const VaryingTFB& tfb = it->second; | ||
| 780 | return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, | ||
| 781 | tfb.offset, tfb.stride); | ||
| 782 | } | ||
| 783 | |||
| 629 | void DeclareOutputAttribute(Attribute::Index index) { | 784 | void DeclareOutputAttribute(Attribute::Index index) { |
| 630 | const u32 location{GetGenericAttributeIndex(index)}; | 785 | static constexpr std::string_view swizzle = "xyzw"; |
| 631 | code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); | 786 | u8 element = 0; |
| 787 | while (element < 4) { | ||
| 788 | auto xfb = GetTransformFeedbackDecoration(index, element); | ||
| 789 | if (!xfb.empty()) { | ||
| 790 | xfb = fmt::format(", {}", xfb); | ||
| 791 | } | ||
| 792 | const std::size_t remainder = 4 - element; | ||
| 793 | const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); | ||
| 794 | const char* const type = FLOAT_TYPES.at(num_components - 1); | ||
| 795 | |||
| 796 | const u32 location = GetGenericAttributeIndex(index); | ||
| 797 | |||
| 798 | GenericVaryingDescription description; | ||
| 799 | description.first_element = static_cast<u8>(element); | ||
| 800 | description.is_scalar = num_components == 1; | ||
| 801 | description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); | ||
| 802 | if (element != 0 || num_components != 4) { | ||
| 803 | const std::string_view name_swizzle = swizzle.substr(element, num_components); | ||
| 804 | description.name = fmt::format("{}_{}", description.name, name_swizzle); | ||
| 805 | } | ||
| 806 | for (std::size_t i = 0; i < num_components; ++i) { | ||
| 807 | const u8 offset = static_cast<u8>(location * 4 + element + i); | ||
| 808 | varying_description.insert({offset, description}); | ||
| 809 | } | ||
| 810 | |||
| 811 | code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, | ||
| 812 | xfb, type, description.name); | ||
| 813 | |||
| 814 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 815 | } | ||
| 632 | } | 816 | } |
| 633 | 817 | ||
| 634 | void DeclareConstantBuffers() { | 818 | void DeclareConstantBuffers() { |
| @@ -925,7 +1109,8 @@ private: | |||
| 925 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games | 1109 | // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games |
| 926 | // set an 0x80000000 index for those and the shader fails to build. Find out why | 1110 | // set an 0x80000000 index for those and the shader fails to build. Find out why |
| 927 | // this happens and what's its intent. | 1111 | // this happens and what's its intent. |
| 928 | return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); | 1112 | return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), |
| 1113 | max_input_vertices.value()); | ||
| 929 | } | 1114 | } |
| 930 | return std::string(name); | 1115 | return std::string(name); |
| 931 | }; | 1116 | }; |
| @@ -959,7 +1144,7 @@ private: | |||
| 959 | // TODO(Subv): Find out what the values are for the first two elements when inside a | 1144 | // TODO(Subv): Find out what the values are for the first two elements when inside a |
| 960 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval | 1145 | // vertex shader, and what's the value of the fourth element when inside a Tess Eval |
| 961 | // shader. | 1146 | // shader. |
| 962 | ASSERT(IsVertexShader(stage)); | 1147 | ASSERT(stage == ShaderType::Vertex); |
| 963 | switch (element) { | 1148 | switch (element) { |
| 964 | case 2: | 1149 | case 2: |
| 965 | // Config pack's first value is instance_id. | 1150 | // Config pack's first value is instance_id. |
| @@ -980,7 +1165,7 @@ private: | |||
| 980 | return {"0", Type::Int}; | 1165 | return {"0", Type::Int}; |
| 981 | default: | 1166 | default: |
| 982 | if (IsGenericAttribute(attribute)) { | 1167 | if (IsGenericAttribute(attribute)) { |
| 983 | return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), | 1168 | return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), |
| 984 | Type::Float}; | 1169 | Type::Float}; |
| 985 | } | 1170 | } |
| 986 | break; | 1171 | break; |
| @@ -1030,12 +1215,12 @@ private: | |||
| 1030 | UNIMPLEMENTED(); | 1215 | UNIMPLEMENTED(); |
| 1031 | return {}; | 1216 | return {}; |
| 1032 | case 1: | 1217 | case 1: |
| 1033 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | 1218 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { |
| 1034 | return {}; | 1219 | return {}; |
| 1035 | } | 1220 | } |
| 1036 | return {{"gl_Layer", Type::Int}}; | 1221 | return {{"gl_Layer", Type::Int}}; |
| 1037 | case 2: | 1222 | case 2: |
| 1038 | if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | 1223 | if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { |
| 1039 | return {}; | 1224 | return {}; |
| 1040 | } | 1225 | } |
| 1041 | return {{"gl_ViewportIndex", Type::Int}}; | 1226 | return {{"gl_ViewportIndex", Type::Int}}; |
| @@ -1049,8 +1234,7 @@ private: | |||
| 1049 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; | 1234 | return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; |
| 1050 | default: | 1235 | default: |
| 1051 | if (IsGenericAttribute(attribute)) { | 1236 | if (IsGenericAttribute(attribute)) { |
| 1052 | return { | 1237 | return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}}; |
| 1053 | {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; | ||
| 1054 | } | 1238 | } |
| 1055 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | 1239 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); |
| 1056 | return {}; | 1240 | return {}; |
| @@ -1822,16 +2006,19 @@ private: | |||
| 1822 | expr += GetSampler(meta->sampler); | 2006 | expr += GetSampler(meta->sampler); |
| 1823 | expr += ", "; | 2007 | expr += ", "; |
| 1824 | 2008 | ||
| 1825 | expr += constructors.at(operation.GetOperandsCount() - 1); | 2009 | expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); |
| 1826 | expr += '('; | 2010 | expr += '('; |
| 1827 | for (std::size_t i = 0; i < count; ++i) { | 2011 | for (std::size_t i = 0; i < count; ++i) { |
| 1828 | expr += VisitOperand(operation, i).AsInt(); | 2012 | if (i > 0) { |
| 1829 | const std::size_t next = i + 1; | ||
| 1830 | if (next == count) | ||
| 1831 | expr += ')'; | ||
| 1832 | else if (next < count) | ||
| 1833 | expr += ", "; | 2013 | expr += ", "; |
| 2014 | } | ||
| 2015 | expr += VisitOperand(operation, i).AsInt(); | ||
| 2016 | } | ||
| 2017 | if (meta->array) { | ||
| 2018 | expr += ", "; | ||
| 2019 | expr += Visit(meta->array).AsInt(); | ||
| 1834 | } | 2020 | } |
| 2021 | expr += ')'; | ||
| 1835 | 2022 | ||
| 1836 | if (meta->lod && !meta->sampler.IsBuffer()) { | 2023 | if (meta->lod && !meta->sampler.IsBuffer()) { |
| 1837 | expr += ", "; | 2024 | expr += ", "; |
| @@ -1945,7 +2132,7 @@ private: | |||
| 1945 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. | 2132 | // TODO(Subv): Figure out how dual-source blending is configured in the Switch. |
| 1946 | for (u32 component = 0; component < 4; ++component) { | 2133 | for (u32 component = 0; component < 4; ++component) { |
| 1947 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | 2134 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { |
| 1948 | code.AddLine("FragColor{}[{}] = {};", render_target, component, | 2135 | code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), |
| 1949 | SafeGetRegister(current_reg).AsFloat()); | 2136 | SafeGetRegister(current_reg).AsFloat()); |
| 1950 | ++current_reg; | 2137 | ++current_reg; |
| 1951 | } | 2138 | } |
| @@ -2261,27 +2448,34 @@ private: | |||
| 2261 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2448 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2262 | 2449 | ||
| 2263 | std::string GetRegister(u32 index) const { | 2450 | std::string GetRegister(u32 index) const { |
| 2264 | return GetDeclarationWithSuffix(index, "gpr"); | 2451 | return AppendSuffix(index, "gpr"); |
| 2265 | } | 2452 | } |
| 2266 | 2453 | ||
| 2267 | std::string GetCustomVariable(u32 index) const { | 2454 | std::string GetCustomVariable(u32 index) const { |
| 2268 | return GetDeclarationWithSuffix(index, "custom_var"); | 2455 | return AppendSuffix(index, "custom_var"); |
| 2269 | } | 2456 | } |
| 2270 | 2457 | ||
| 2271 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | 2458 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 2272 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); | 2459 | return AppendSuffix(static_cast<u32>(pred), "pred"); |
| 2273 | } | 2460 | } |
| 2274 | 2461 | ||
| 2275 | std::string GetInputAttribute(Attribute::Index attribute) const { | 2462 | std::string GetGenericInputAttribute(Attribute::Index attribute) const { |
| 2276 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); | 2463 | return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); |
| 2277 | } | 2464 | } |
| 2278 | 2465 | ||
| 2279 | std::string GetOutputAttribute(Attribute::Index attribute) const { | 2466 | std::unordered_map<u8, GenericVaryingDescription> varying_description; |
| 2280 | return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); | 2467 | |
| 2468 | std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { | ||
| 2469 | const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); | ||
| 2470 | const auto& description = varying_description.at(offset); | ||
| 2471 | if (description.is_scalar) { | ||
| 2472 | return description.name; | ||
| 2473 | } | ||
| 2474 | return fmt::format("{}[{}]", description.name, element - description.first_element); | ||
| 2281 | } | 2475 | } |
| 2282 | 2476 | ||
| 2283 | std::string GetConstBuffer(u32 index) const { | 2477 | std::string GetConstBuffer(u32 index) const { |
| 2284 | return GetDeclarationWithSuffix(index, "cbuf"); | 2478 | return AppendSuffix(index, "cbuf"); |
| 2285 | } | 2479 | } |
| 2286 | 2480 | ||
| 2287 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { | 2481 | std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { |
| @@ -2294,11 +2488,15 @@ private: | |||
| 2294 | } | 2488 | } |
| 2295 | 2489 | ||
| 2296 | std::string GetConstBufferBlock(u32 index) const { | 2490 | std::string GetConstBufferBlock(u32 index) const { |
| 2297 | return GetDeclarationWithSuffix(index, "cbuf_block"); | 2491 | return AppendSuffix(index, "cbuf_block"); |
| 2298 | } | 2492 | } |
| 2299 | 2493 | ||
| 2300 | std::string GetLocalMemory() const { | 2494 | std::string GetLocalMemory() const { |
| 2301 | return "lmem_" + suffix; | 2495 | if (suffix.empty()) { |
| 2496 | return "lmem"; | ||
| 2497 | } else { | ||
| 2498 | return "lmem_" + std::string{suffix}; | ||
| 2499 | } | ||
| 2302 | } | 2500 | } |
| 2303 | 2501 | ||
| 2304 | std::string GetInternalFlag(InternalFlag flag) const { | 2502 | std::string GetInternalFlag(InternalFlag flag) const { |
| @@ -2307,23 +2505,31 @@ private: | |||
| 2307 | const auto index = static_cast<u32>(flag); | 2505 | const auto index = static_cast<u32>(flag); |
| 2308 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); | 2506 | ASSERT(index < static_cast<u32>(InternalFlag::Amount)); |
| 2309 | 2507 | ||
| 2310 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | 2508 | if (suffix.empty()) { |
| 2509 | return InternalFlagNames[index]; | ||
| 2510 | } else { | ||
| 2511 | return fmt::format("{}_{}", InternalFlagNames[index], suffix); | ||
| 2512 | } | ||
| 2311 | } | 2513 | } |
| 2312 | 2514 | ||
| 2313 | std::string GetSampler(const Sampler& sampler) const { | 2515 | std::string GetSampler(const Sampler& sampler) const { |
| 2314 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); | 2516 | return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 2315 | } | 2517 | } |
| 2316 | 2518 | ||
| 2317 | std::string GetImage(const Image& image) const { | 2519 | std::string GetImage(const Image& image) const { |
| 2318 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | 2520 | return AppendSuffix(static_cast<u32>(image.GetIndex()), "image"); |
| 2319 | } | 2521 | } |
| 2320 | 2522 | ||
| 2321 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { | 2523 | std::string AppendSuffix(u32 index, std::string_view name) const { |
| 2322 | return fmt::format("{}_{}_{}", name, index, suffix); | 2524 | if (suffix.empty()) { |
| 2525 | return fmt::format("{}{}", name, index); | ||
| 2526 | } else { | ||
| 2527 | return fmt::format("{}{}_{}", name, index, suffix); | ||
| 2528 | } | ||
| 2323 | } | 2529 | } |
| 2324 | 2530 | ||
| 2325 | u32 GetNumPhysicalInputAttributes() const { | 2531 | u32 GetNumPhysicalInputAttributes() const { |
| 2326 | return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | 2532 | return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); |
| 2327 | } | 2533 | } |
| 2328 | 2534 | ||
| 2329 | u32 GetNumPhysicalAttributes() const { | 2535 | u32 GetNumPhysicalAttributes() const { |
| @@ -2334,17 +2540,31 @@ private: | |||
| 2334 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); | 2540 | return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); |
| 2335 | } | 2541 | } |
| 2336 | 2542 | ||
| 2543 | bool IsRenderTargetEnabled(u32 render_target) const { | ||
| 2544 | for (u32 component = 0; component < 4; ++component) { | ||
| 2545 | if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { | ||
| 2546 | return true; | ||
| 2547 | } | ||
| 2548 | } | ||
| 2549 | return false; | ||
| 2550 | } | ||
| 2551 | |||
| 2337 | const Device& device; | 2552 | const Device& device; |
| 2338 | const ShaderIR& ir; | 2553 | const ShaderIR& ir; |
| 2554 | const Registry& registry; | ||
| 2339 | const ShaderType stage; | 2555 | const ShaderType stage; |
| 2340 | const std::string suffix; | 2556 | const std::string_view identifier; |
| 2557 | const std::string_view suffix; | ||
| 2341 | const Header header; | 2558 | const Header header; |
| 2559 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2342 | 2560 | ||
| 2343 | ShaderWriter code; | 2561 | ShaderWriter code; |
| 2562 | |||
| 2563 | std::optional<u32> max_input_vertices; | ||
| 2344 | }; | 2564 | }; |
| 2345 | 2565 | ||
| 2346 | std::string GetFlowVariable(u32 i) { | 2566 | std::string GetFlowVariable(u32 index) { |
| 2347 | return fmt::format("flow_var_{}", i); | 2567 | return fmt::format("flow_var{}", index); |
| 2348 | } | 2568 | } |
| 2349 | 2569 | ||
| 2350 | class ExprDecompiler { | 2570 | class ExprDecompiler { |
| @@ -2531,7 +2751,7 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2531 | 2751 | ||
| 2532 | } // Anonymous namespace | 2752 | } // Anonymous namespace |
| 2533 | 2753 | ||
| 2534 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | 2754 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { |
| 2535 | ShaderEntries entries; | 2755 | ShaderEntries entries; |
| 2536 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 2756 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 2537 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | 2757 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |
| @@ -2555,28 +2775,12 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2555 | return entries; | 2775 | return entries; |
| 2556 | } | 2776 | } |
| 2557 | 2777 | ||
| 2558 | std::string GetCommonDeclarations() { | 2778 | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 2559 | return R"(#define ftoi floatBitsToInt | 2779 | ShaderType stage, std::string_view identifier, |
| 2560 | #define ftou floatBitsToUint | 2780 | std::string_view suffix) { |
| 2561 | #define itof intBitsToFloat | 2781 | GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); |
| 2562 | #define utof uintBitsToFloat | ||
| 2563 | |||
| 2564 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | ||
| 2565 | bvec2 is_nan1 = isnan(pair1); | ||
| 2566 | bvec2 is_nan2 = isnan(pair2); | ||
| 2567 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||
| 2568 | } | ||
| 2569 | |||
| 2570 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | ||
| 2571 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | ||
| 2572 | )"; | ||
| 2573 | } | ||
| 2574 | |||
| 2575 | std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, | ||
| 2576 | const std::string& suffix) { | ||
| 2577 | GLSLDecompiler decompiler(device, ir, stage, suffix); | ||
| 2578 | decompiler.Decompile(); | 2782 | decompiler.Decompile(); |
| 2579 | return decompiler.GetResult(); | 2783 | return decompiler.GetResult(); |
| 2580 | } | 2784 | } |
| 2581 | 2785 | ||
| 2582 | } // namespace OpenGL::GLShader | 2786 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 0f692c1db..e7dbd810c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -6,22 +6,18 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <string> | 8 | #include <string> |
| 9 | #include <string_view> | ||
| 9 | #include <utility> | 10 | #include <utility> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/shader_type.h" | 14 | #include "video_core/engines/shader_type.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 15 | 17 | ||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace OpenGL { | 18 | namespace OpenGL { |
| 21 | class Device; | ||
| 22 | } | ||
| 23 | 19 | ||
| 24 | namespace OpenGL::GLShader { | 20 | class Device; |
| 25 | 21 | ||
| 26 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 22 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 27 | using SamplerEntry = VideoCommon::Shader::Sampler; | 23 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| @@ -78,11 +74,11 @@ struct ShaderEntries { | |||
| 78 | std::size_t shader_length{}; | 74 | std::size_t shader_length{}; |
| 79 | }; | 75 | }; |
| 80 | 76 | ||
| 81 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | 77 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 82 | |||
| 83 | std::string GetCommonDeclarations(); | ||
| 84 | 78 | ||
| 85 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 79 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 86 | Tegra::Engines::ShaderType stage, const std::string& suffix); | 80 | const VideoCommon::Shader::Registry& registry, |
| 81 | Tegra::Engines::ShaderType stage, std::string_view identifier, | ||
| 82 | std::string_view suffix = {}); | ||
| 87 | 83 | ||
| 88 | } // namespace OpenGL::GLShader | 84 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 1fc204f6f..9e95a122b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -31,32 +31,24 @@ namespace { | |||
| 31 | 31 | ||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 32 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 33 | ||
| 34 | enum class TransferableEntryKind : u32 { | ||
| 35 | Raw, | ||
| 36 | Usage, | ||
| 37 | }; | ||
| 38 | |||
| 39 | struct ConstBufferKey { | 34 | struct ConstBufferKey { |
| 40 | u32 cbuf{}; | 35 | u32 cbuf = 0; |
| 41 | u32 offset{}; | 36 | u32 offset = 0; |
| 42 | u32 value{}; | 37 | u32 value = 0; |
| 43 | }; | 38 | }; |
| 44 | 39 | ||
| 45 | struct BoundSamplerKey { | 40 | struct BoundSamplerKey { |
| 46 | u32 offset{}; | 41 | u32 offset = 0; |
| 47 | Tegra::Engines::SamplerDescriptor sampler{}; | 42 | Tegra::Engines::SamplerDescriptor sampler; |
| 48 | }; | 43 | }; |
| 49 | 44 | ||
| 50 | struct BindlessSamplerKey { | 45 | struct BindlessSamplerKey { |
| 51 | u32 cbuf{}; | 46 | u32 cbuf = 0; |
| 52 | u32 offset{}; | 47 | u32 offset = 0; |
| 53 | Tegra::Engines::SamplerDescriptor sampler{}; | 48 | Tegra::Engines::SamplerDescriptor sampler; |
| 54 | }; | 49 | }; |
| 55 | 50 | ||
| 56 | constexpr u32 NativeVersion = 12; | 51 | constexpr u32 NativeVersion = 20; |
| 57 | |||
| 58 | // Making sure sizes doesn't change by accident | ||
| 59 | static_assert(sizeof(ProgramVariant) == 20); | ||
| 60 | 52 | ||
| 61 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 62 | ShaderCacheVersionHash hash{}; | 54 | ShaderCacheVersionHash hash{}; |
| @@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 67 | 59 | ||
| 68 | } // Anonymous namespace | 60 | } // Anonymous namespace |
| 69 | 61 | ||
| 70 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, | 62 | ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; |
| 71 | ProgramCode code_b) | ||
| 72 | : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( | ||
| 73 | code_b)} {} | ||
| 74 | 63 | ||
| 75 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 64 | ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; |
| 76 | 65 | ||
| 77 | ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; | 66 | bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { |
| 78 | 67 | if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | |
| 79 | bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | ||
| 80 | if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || | ||
| 81 | file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { | ||
| 82 | return false; | 68 | return false; |
| 83 | } | 69 | } |
| 84 | u32 code_size{}; | 70 | u32 code_size; |
| 85 | u32 code_size_b{}; | 71 | u32 code_size_b; |
| 86 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || | 72 | if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || |
| 87 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { | 73 | file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { |
| 88 | return false; | 74 | return false; |
| 89 | } | 75 | } |
| 90 | |||
| 91 | code.resize(code_size); | 76 | code.resize(code_size); |
| 92 | code_b.resize(code_size_b); | 77 | code_b.resize(code_size_b); |
| 93 | 78 | ||
| 94 | if (file.ReadArray(code.data(), code_size) != code_size) | 79 | if (file.ReadArray(code.data(), code_size) != code_size) { |
| 95 | return false; | 80 | return false; |
| 96 | 81 | } | |
| 97 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { | 82 | if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { |
| 98 | return false; | 83 | return false; |
| 99 | } | 84 | } |
| 85 | |||
| 86 | u8 is_texture_handler_size_known; | ||
| 87 | u32 texture_handler_size_value; | ||
| 88 | u32 num_keys; | ||
| 89 | u32 num_bound_samplers; | ||
| 90 | u32 num_bindless_samplers; | ||
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | ||
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | ||
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || | ||
| 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | ||
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 97 | return false; | ||
| 98 | } | ||
| 99 | if (is_texture_handler_size_known) { | ||
| 100 | texture_handler_size = texture_handler_size_value; | ||
| 101 | } | ||
| 102 | |||
| 103 | std::vector<ConstBufferKey> flat_keys(num_keys); | ||
| 104 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | ||
| 105 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | ||
| 106 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | ||
| 107 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | ||
| 108 | flat_bound_samplers.size() || | ||
| 109 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | ||
| 110 | flat_bindless_samplers.size()) { | ||
| 111 | return false; | ||
| 112 | } | ||
| 113 | for (const auto& key : flat_keys) { | ||
| 114 | keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 115 | } | ||
| 116 | for (const auto& key : flat_bound_samplers) { | ||
| 117 | bound_samplers.emplace(key.offset, key.sampler); | ||
| 118 | } | ||
| 119 | for (const auto& key : flat_bindless_samplers) { | ||
| 120 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 121 | } | ||
| 122 | |||
| 100 | return true; | 123 | return true; |
| 101 | } | 124 | } |
| 102 | 125 | ||
| 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 126 | bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { |
| 104 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || | 127 | if (file.WriteObject(static_cast<u32>(type)) != 1 || |
| 105 | file.WriteObject(static_cast<u32>(code.size())) != 1 || | 128 | file.WriteObject(static_cast<u32>(code.size())) != 1 || |
| 106 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { | 129 | file.WriteObject(static_cast<u32>(code_b.size())) != 1) { |
| 107 | return false; | 130 | return false; |
| 108 | } | 131 | } |
| 109 | 132 | if (file.WriteArray(code.data(), code.size()) != code.size()) { | |
| 110 | if (file.WriteArray(code.data(), code.size()) != code.size()) | ||
| 111 | return false; | 133 | return false; |
| 112 | 134 | } | |
| 113 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { | 135 | if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { |
| 114 | return false; | 136 | return false; |
| 115 | } | 137 | } |
| 116 | return true; | 138 | |
| 139 | if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || | ||
| 140 | file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || | ||
| 141 | file.WriteObject(texture_handler_size.value_or(0)) != 1 || | ||
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | ||
| 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | ||
| 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | ||
| 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | ||
| 146 | return false; | ||
| 147 | } | ||
| 148 | |||
| 149 | std::vector<ConstBufferKey> flat_keys; | ||
| 150 | flat_keys.reserve(keys.size()); | ||
| 151 | for (const auto& [address, value] : keys) { | ||
| 152 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | ||
| 153 | } | ||
| 154 | |||
| 155 | std::vector<BoundSamplerKey> flat_bound_samplers; | ||
| 156 | flat_bound_samplers.reserve(bound_samplers.size()); | ||
| 157 | for (const auto& [address, sampler] : bound_samplers) { | ||
| 158 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | ||
| 159 | } | ||
| 160 | |||
| 161 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | ||
| 162 | flat_bindless_samplers.reserve(bindless_samplers.size()); | ||
| 163 | for (const auto& [address, sampler] : bindless_samplers) { | ||
| 164 | flat_bindless_samplers.push_back( | ||
| 165 | BindlessSamplerKey{address.first, address.second, sampler}); | ||
| 166 | } | ||
| 167 | |||
| 168 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | ||
| 169 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | ||
| 170 | flat_bound_samplers.size() && | ||
| 171 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | ||
| 172 | flat_bindless_samplers.size(); | ||
| 117 | } | 173 | } |
| 118 | 174 | ||
| 119 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} | 175 | ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} |
| 120 | 176 | ||
| 121 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; | 177 | ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; |
| 122 | 178 | ||
| 123 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 179 | std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { |
| 124 | ShaderDiskCacheOpenGL::LoadTransferable() { | ||
| 125 | // Skip games without title id | 180 | // Skip games without title id |
| 126 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; | 181 | const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; |
| 127 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { | 182 | if (!Settings::values.use_disk_shader_cache || !has_title_id) { |
| @@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 130 | 185 | ||
| 131 | FileUtil::IOFile file(GetTransferablePath(), "rb"); | 186 | FileUtil::IOFile file(GetTransferablePath(), "rb"); |
| 132 | if (!file.IsOpen()) { | 187 | if (!file.IsOpen()) { |
| 133 | LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", | 188 | LOG_INFO(Render_OpenGL, "No transferable shader cache found"); |
| 134 | GetTitleID()); | ||
| 135 | is_usable = true; | 189 | is_usable = true; |
| 136 | return {}; | 190 | return {}; |
| 137 | } | 191 | } |
| 138 | 192 | ||
| 139 | u32 version{}; | 193 | u32 version{}; |
| 140 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { | 194 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { |
| 141 | LOG_ERROR(Render_OpenGL, | 195 | LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); |
| 142 | "Failed to get transferable cache version for title id={}, skipping", | ||
| 143 | GetTitleID()); | ||
| 144 | return {}; | 196 | return {}; |
| 145 | } | 197 | } |
| 146 | 198 | ||
| @@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 158 | } | 210 | } |
| 159 | 211 | ||
| 160 | // Version is valid, load the shaders | 212 | // Version is valid, load the shaders |
| 161 | constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; | 213 | std::vector<ShaderDiskCacheEntry> entries; |
| 162 | std::vector<ShaderDiskCacheRaw> raws; | ||
| 163 | std::vector<ShaderDiskCacheUsage> usages; | ||
| 164 | while (file.Tell() < file.GetSize()) { | 214 | while (file.Tell() < file.GetSize()) { |
| 165 | TransferableEntryKind kind{}; | 215 | ShaderDiskCacheEntry& entry = entries.emplace_back(); |
| 166 | if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { | 216 | if (!entry.Load(file)) { |
| 167 | LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); | 217 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); |
| 168 | return {}; | ||
| 169 | } | ||
| 170 | |||
| 171 | switch (kind) { | ||
| 172 | case TransferableEntryKind::Raw: { | ||
| 173 | ShaderDiskCacheRaw entry; | ||
| 174 | if (!entry.Load(file)) { | ||
| 175 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | transferable.insert({entry.GetUniqueIdentifier(), {}}); | ||
| 179 | raws.push_back(std::move(entry)); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case TransferableEntryKind::Usage: { | ||
| 183 | ShaderDiskCacheUsage usage; | ||
| 184 | |||
| 185 | u32 num_keys{}; | ||
| 186 | u32 num_bound_samplers{}; | ||
| 187 | u32 num_bindless_samplers{}; | ||
| 188 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||
| 189 | file.ReadArray(&usage.variant, 1) != 1 || | ||
| 190 | file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 191 | file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 192 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 193 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 194 | return {}; | ||
| 195 | } | ||
| 196 | |||
| 197 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 198 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 199 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 200 | if (file.ReadArray(keys.data(), keys.size()) != keys.size() || | ||
| 201 | file.ReadArray(bound_samplers.data(), bound_samplers.size()) != | ||
| 202 | bound_samplers.size() || | ||
| 203 | file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 204 | bindless_samplers.size()) { | ||
| 205 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 206 | return {}; | ||
| 207 | } | ||
| 208 | for (const auto& key : keys) { | ||
| 209 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 210 | } | ||
| 211 | for (const auto& key : bound_samplers) { | ||
| 212 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 213 | } | ||
| 214 | for (const auto& key : bindless_samplers) { | ||
| 215 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 216 | } | ||
| 217 | |||
| 218 | usages.push_back(std::move(usage)); | ||
| 219 | break; | ||
| 220 | } | ||
| 221 | default: | ||
| 222 | LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", | ||
| 223 | static_cast<u32>(kind)); | ||
| 224 | return {}; | 218 | return {}; |
| 225 | } | 219 | } |
| 226 | } | 220 | } |
| 227 | 221 | ||
| 228 | is_usable = true; | 222 | is_usable = true; |
| 229 | return {{std::move(raws), std::move(usages)}}; | 223 | return {std::move(entries)}; |
| 230 | } | 224 | } |
| 231 | 225 | ||
| 232 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> | 226 | std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { |
| 233 | ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||
| 234 | if (!is_usable) { | 227 | if (!is_usable) { |
| 235 | return {}; | 228 | return {}; |
| 236 | } | 229 | } |
| 237 | 230 | ||
| 238 | std::string path = GetPrecompiledPath(); | 231 | FileUtil::IOFile file(GetPrecompiledPath(), "rb"); |
| 239 | FileUtil::IOFile file(path, "rb"); | ||
| 240 | if (!file.IsOpen()) { | 232 | if (!file.IsOpen()) { |
| 241 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | 233 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); |
| 242 | GetTitleID()); | ||
| 243 | return {}; | 234 | return {}; |
| 244 | } | 235 | } |
| 245 | 236 | ||
| 246 | const auto result = LoadPrecompiledFile(file); | 237 | if (const auto result = LoadPrecompiledFile(file)) { |
| 247 | if (!result) { | 238 | return *result; |
| 248 | LOG_INFO(Render_OpenGL, | ||
| 249 | "Failed to load precompiled cache for game with title id={}, removing", | ||
| 250 | GetTitleID()); | ||
| 251 | file.Close(); | ||
| 252 | InvalidatePrecompiled(); | ||
| 253 | return {}; | ||
| 254 | } | 239 | } |
| 255 | return *result; | 240 | |
| 241 | LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); | ||
| 242 | file.Close(); | ||
| 243 | InvalidatePrecompiled(); | ||
| 244 | return {}; | ||
| 256 | } | 245 | } |
| 257 | 246 | ||
| 258 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 247 | std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( |
| 259 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | 248 | FileUtil::IOFile& file) { |
| 260 | // Read compressed file from disk and decompress to virtual precompiled cache file | 249 | // Read compressed file from disk and decompress to virtual precompiled cache file |
| 261 | std::vector<u8> compressed(file.GetSize()); | 250 | std::vector<u8> compressed(file.GetSize()); |
| 262 | file.ReadBytes(compressed.data(), compressed.size()); | 251 | file.ReadBytes(compressed.data(), compressed.size()); |
| @@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 275 | return {}; | 264 | return {}; |
| 276 | } | 265 | } |
| 277 | 266 | ||
| 278 | ShaderDumpsMap dumps; | 267 | std::vector<ShaderDiskCachePrecompiled> entries; |
| 279 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | 268 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |
| 280 | u32 num_keys{}; | 269 | u32 binary_size; |
| 281 | u32 num_bound_samplers{}; | 270 | auto& entry = entries.emplace_back(); |
| 282 | u32 num_bindless_samplers{}; | 271 | if (!LoadObjectFromPrecompiled(entry.unique_identifier) || |
| 283 | ShaderDiskCacheUsage usage; | 272 | !LoadObjectFromPrecompiled(entry.binary_format) || |
| 284 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | 273 | !LoadObjectFromPrecompiled(binary_size)) { |
| 285 | !LoadObjectFromPrecompiled(usage.variant) || | ||
| 286 | !LoadObjectFromPrecompiled(usage.bound_buffer) || | ||
| 287 | !LoadObjectFromPrecompiled(num_keys) || | ||
| 288 | !LoadObjectFromPrecompiled(num_bound_samplers) || | ||
| 289 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||
| 290 | return {}; | ||
| 291 | } | ||
| 292 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 293 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 294 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 295 | if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || | ||
| 296 | !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != | ||
| 297 | bound_samplers.size() || | ||
| 298 | !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 299 | bindless_samplers.size()) { | ||
| 300 | return {}; | ||
| 301 | } | ||
| 302 | for (const auto& key : keys) { | ||
| 303 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 304 | } | ||
| 305 | for (const auto& key : bound_samplers) { | ||
| 306 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 307 | } | ||
| 308 | for (const auto& key : bindless_samplers) { | ||
| 309 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 310 | } | ||
| 311 | |||
| 312 | ShaderDiskCacheDump dump; | ||
| 313 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||
| 314 | return {}; | ||
| 315 | } | ||
| 316 | |||
| 317 | u32 binary_length{}; | ||
| 318 | if (!LoadObjectFromPrecompiled(binary_length)) { | ||
| 319 | return {}; | 274 | return {}; |
| 320 | } | 275 | } |
| 321 | 276 | ||
| 322 | dump.binary.resize(binary_length); | 277 | entry.binary.resize(binary_size); |
| 323 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | 278 | if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { |
| 324 | return {}; | 279 | return {}; |
| 325 | } | 280 | } |
| 326 | |||
| 327 | dumps.emplace(std::move(usage), dump); | ||
| 328 | } | 281 | } |
| 329 | return dumps; | 282 | return entries; |
| 330 | } | 283 | } |
| 331 | 284 | ||
| 332 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | 285 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { |
| @@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { | |||
| 346 | } | 299 | } |
| 347 | } | 300 | } |
| 348 | 301 | ||
| 349 | void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | 302 | void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { |
| 350 | if (!is_usable) { | 303 | if (!is_usable) { |
| 351 | return; | 304 | return; |
| 352 | } | 305 | } |
| 353 | 306 | ||
| 354 | const u64 id = entry.GetUniqueIdentifier(); | 307 | const u64 id = entry.unique_identifier; |
| 355 | if (transferable.find(id) != transferable.end()) { | 308 | if (stored_transferable.find(id) != stored_transferable.end()) { |
| 356 | // The shader already exists | 309 | // The shader already exists |
| 357 | return; | 310 | return; |
| 358 | } | 311 | } |
| @@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | |||
| 361 | if (!file.IsOpen()) { | 314 | if (!file.IsOpen()) { |
| 362 | return; | 315 | return; |
| 363 | } | 316 | } |
| 364 | if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { | 317 | if (!entry.Save(file)) { |
| 365 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | 318 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); |
| 366 | file.Close(); | 319 | file.Close(); |
| 367 | InvalidateTransferable(); | 320 | InvalidateTransferable(); |
| 368 | return; | 321 | return; |
| 369 | } | 322 | } |
| 370 | transferable.insert({id, {}}); | ||
| 371 | } | ||
| 372 | 323 | ||
| 373 | void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | 324 | stored_transferable.insert(id); |
| 374 | if (!is_usable) { | ||
| 375 | return; | ||
| 376 | } | ||
| 377 | |||
| 378 | const auto it = transferable.find(usage.unique_identifier); | ||
| 379 | ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); | ||
| 380 | |||
| 381 | auto& usages{it->second}; | ||
| 382 | if (usages.find(usage) != usages.end()) { | ||
| 383 | // Skip this variant since the shader is already stored. | ||
| 384 | return; | ||
| 385 | } | ||
| 386 | usages.insert(usage); | ||
| 387 | |||
| 388 | FileUtil::IOFile file = AppendTransferableFile(); | ||
| 389 | if (!file.IsOpen()) | ||
| 390 | return; | ||
| 391 | const auto Close = [&] { | ||
| 392 | LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); | ||
| 393 | file.Close(); | ||
| 394 | InvalidateTransferable(); | ||
| 395 | }; | ||
| 396 | |||
| 397 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | ||
| 398 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | ||
| 399 | file.WriteObject(usage.bound_buffer) != 1 || | ||
| 400 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | ||
| 401 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||
| 402 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||
| 403 | Close(); | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | for (const auto& [pair, value] : usage.keys) { | ||
| 407 | const auto [cbuf, offset] = pair; | ||
| 408 | if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 409 | Close(); | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | } | ||
| 413 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 414 | if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 415 | Close(); | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 420 | const auto [cbuf, offset] = pair; | ||
| 421 | if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 422 | Close(); | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | } | ||
| 426 | } | 325 | } |
| 427 | 326 | ||
| 428 | void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { | 327 | void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { |
| 429 | if (!is_usable) { | 328 | if (!is_usable) { |
| 430 | return; | 329 | return; |
| 431 | } | 330 | } |
| @@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 437 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | 336 | SavePrecompiledHeaderToVirtualPrecompiledCache(); |
| 438 | } | 337 | } |
| 439 | 338 | ||
| 440 | GLint binary_length{}; | 339 | GLint binary_length; |
| 441 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | 340 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |
| 442 | 341 | ||
| 443 | GLenum binary_format{}; | 342 | GLenum binary_format; |
| 444 | std::vector<u8> binary(binary_length); | 343 | std::vector<u8> binary(binary_length); |
| 445 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 344 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 446 | 345 | ||
| 447 | const auto Close = [&] { | 346 | if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || |
| 347 | !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || | ||
| 348 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 448 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | 349 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", |
| 449 | usage.unique_identifier); | 350 | unique_identifier); |
| 450 | InvalidatePrecompiled(); | 351 | InvalidatePrecompiled(); |
| 451 | }; | ||
| 452 | |||
| 453 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||
| 454 | !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || | ||
| 455 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||
| 456 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||
| 457 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||
| 458 | Close(); | ||
| 459 | return; | ||
| 460 | } | ||
| 461 | for (const auto& [pair, value] : usage.keys) { | ||
| 462 | const auto [cbuf, offset] = pair; | ||
| 463 | if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 464 | Close(); | ||
| 465 | return; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 469 | if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 470 | Close(); | ||
| 471 | return; | ||
| 472 | } | ||
| 473 | } | ||
| 474 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 475 | const auto [cbuf, offset] = pair; | ||
| 476 | if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 477 | Close(); | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | } | ||
| 481 | if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||
| 482 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||
| 483 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 484 | Close(); | ||
| 485 | } | 352 | } |
| 486 | } | 353 | } |
| 487 | 354 | ||
| @@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | |||
| 534 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { | 401 | if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { |
| 535 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", | 402 | LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", |
| 536 | precompiled_path); | 403 | precompiled_path); |
| 537 | return; | ||
| 538 | } | 404 | } |
| 539 | } | 405 | } |
| 540 | 406 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ef2371f6d..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -19,8 +19,7 @@ | |||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 20 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 22 | #include "video_core/shader/registry.h" |
| 23 | #include "video_core/shader/const_buffer_locker.h" | ||
| 24 | 23 | ||
| 25 | namespace Core { | 24 | namespace Core { |
| 26 | class System; | 25 | class System; |
| @@ -32,139 +31,39 @@ class IOFile; | |||
| 32 | 31 | ||
| 33 | namespace OpenGL { | 32 | namespace OpenGL { |
| 34 | 33 | ||
| 35 | struct ShaderDiskCacheUsage; | ||
| 36 | struct ShaderDiskCacheDump; | ||
| 37 | |||
| 38 | using ProgramCode = std::vector<u64>; | 34 | using ProgramCode = std::vector<u64>; |
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||
| 40 | |||
| 41 | /// Describes the different variants a program can be compiled with. | ||
| 42 | struct ProgramVariant final { | ||
| 43 | ProgramVariant() = default; | ||
| 44 | |||
| 45 | /// Graphics constructor. | ||
| 46 | explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept | ||
| 47 | : primitive_mode{primitive_mode} {} | ||
| 48 | |||
| 49 | /// Compute constructor. | ||
| 50 | explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, | ||
| 51 | u32 local_memory_size) noexcept | ||
| 52 | : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, | ||
| 53 | shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} | ||
| 54 | |||
| 55 | // Graphics specific parameters. | ||
| 56 | GLenum primitive_mode{}; | ||
| 57 | |||
| 58 | // Compute specific parameters. | ||
| 59 | u32 block_x{}; | ||
| 60 | u16 block_y{}; | ||
| 61 | u16 block_z{}; | ||
| 62 | u32 shared_memory_size{}; | ||
| 63 | u32 local_memory_size{}; | ||
| 64 | |||
| 65 | bool operator==(const ProgramVariant& rhs) const noexcept { | ||
| 66 | return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, | ||
| 67 | local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, | ||
| 68 | rhs.block_z, rhs.shared_memory_size, | ||
| 69 | rhs.local_memory_size); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool operator!=(const ProgramVariant& rhs) const noexcept { | ||
| 73 | return !operator==(rhs); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||
| 77 | |||
| 78 | /// Describes how a shader is used. | ||
| 79 | struct ShaderDiskCacheUsage { | ||
| 80 | u64 unique_identifier{}; | ||
| 81 | ProgramVariant variant; | ||
| 82 | u32 bound_buffer{}; | ||
| 83 | VideoCommon::Shader::KeyMap keys; | ||
| 84 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 85 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 86 | |||
| 87 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | ||
| 88 | return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == | ||
| 89 | std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, | ||
| 90 | rhs.bindless_samplers); | ||
| 91 | } | ||
| 92 | |||
| 93 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | ||
| 94 | return !operator==(rhs); | ||
| 95 | } | ||
| 96 | }; | ||
| 97 | |||
| 98 | } // namespace OpenGL | ||
| 99 | |||
| 100 | namespace std { | ||
| 101 | |||
| 102 | template <> | ||
| 103 | struct hash<OpenGL::ProgramVariant> { | ||
| 104 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | ||
| 105 | return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ | ||
| 106 | static_cast<std::size_t>(variant.block_x) ^ | ||
| 107 | (static_cast<std::size_t>(variant.block_y) << 32) ^ | ||
| 108 | (static_cast<std::size_t>(variant.block_z) << 48) ^ | ||
| 109 | (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ | ||
| 110 | (static_cast<std::size_t>(variant.local_memory_size) << 36); | ||
| 111 | } | ||
| 112 | }; | ||
| 113 | |||
| 114 | template <> | ||
| 115 | struct hash<OpenGL::ShaderDiskCacheUsage> { | ||
| 116 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | ||
| 117 | return static_cast<std::size_t>(usage.unique_identifier) ^ | ||
| 118 | std::hash<OpenGL::ProgramVariant>{}(usage.variant); | ||
| 119 | } | ||
| 120 | }; | ||
| 121 | |||
| 122 | } // namespace std | ||
| 123 | |||
| 124 | namespace OpenGL { | ||
| 125 | 35 | ||
| 126 | /// Describes a shader how it's used by the guest GPU | 36 | /// Describes a shader and how it's used by the guest GPU |
| 127 | class ShaderDiskCacheRaw { | 37 | struct ShaderDiskCacheEntry { |
| 128 | public: | 38 | ShaderDiskCacheEntry(); |
| 129 | explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, | 39 | ~ShaderDiskCacheEntry(); |
| 130 | ProgramCode code, ProgramCode code_b = {}); | ||
| 131 | ShaderDiskCacheRaw(); | ||
| 132 | ~ShaderDiskCacheRaw(); | ||
| 133 | 40 | ||
| 134 | bool Load(FileUtil::IOFile& file); | 41 | bool Load(FileUtil::IOFile& file); |
| 135 | 42 | ||
| 136 | bool Save(FileUtil::IOFile& file) const; | 43 | bool Save(FileUtil::IOFile& file) const; |
| 137 | 44 | ||
| 138 | u64 GetUniqueIdentifier() const { | ||
| 139 | return unique_identifier; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool HasProgramA() const { | 45 | bool HasProgramA() const { |
| 143 | return !code.empty() && !code_b.empty(); | 46 | return !code.empty() && !code_b.empty(); |
| 144 | } | 47 | } |
| 145 | 48 | ||
| 146 | Tegra::Engines::ShaderType GetType() const { | ||
| 147 | return type; | ||
| 148 | } | ||
| 149 | |||
| 150 | const ProgramCode& GetCode() const { | ||
| 151 | return code; | ||
| 152 | } | ||
| 153 | |||
| 154 | const ProgramCode& GetCodeB() const { | ||
| 155 | return code_b; | ||
| 156 | } | ||
| 157 | |||
| 158 | private: | ||
| 159 | u64 unique_identifier{}; | ||
| 160 | Tegra::Engines::ShaderType type{}; | 49 | Tegra::Engines::ShaderType type{}; |
| 161 | ProgramCode code; | 50 | ProgramCode code; |
| 162 | ProgramCode code_b; | 51 | ProgramCode code_b; |
| 52 | |||
| 53 | u64 unique_identifier = 0; | ||
| 54 | std::optional<u32> texture_handler_size; | ||
| 55 | u32 bound_buffer = 0; | ||
| 56 | VideoCommon::Shader::GraphicsInfo graphics_info; | ||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | ||
| 58 | VideoCommon::Shader::KeyMap keys; | ||
| 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 163 | }; | 61 | }; |
| 164 | 62 | ||
| 165 | /// Contains an OpenGL dumped binary program | 63 | /// Contains an OpenGL dumped binary program |
| 166 | struct ShaderDiskCacheDump { | 64 | struct ShaderDiskCachePrecompiled { |
| 167 | GLenum binary_format{}; | 65 | u64 unique_identifier = 0; |
| 66 | GLenum binary_format = 0; | ||
| 168 | std::vector<u8> binary; | 67 | std::vector<u8> binary; |
| 169 | }; | 68 | }; |
| 170 | 69 | ||
| @@ -174,11 +73,10 @@ public: | |||
| 174 | ~ShaderDiskCacheOpenGL(); | 73 | ~ShaderDiskCacheOpenGL(); |
| 175 | 74 | ||
| 176 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. | 75 | /// Loads transferable cache. If file has a old version or on failure, it deletes the file. |
| 177 | std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> | 76 | std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); |
| 178 | LoadTransferable(); | ||
| 179 | 77 | ||
| 180 | /// Loads current game's precompiled cache. Invalidates on failure. | 78 | /// Loads current game's precompiled cache. Invalidates on failure. |
| 181 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); | 79 | std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); |
| 182 | 80 | ||
| 183 | /// Removes the transferable (and precompiled) cache file. | 81 | /// Removes the transferable (and precompiled) cache file. |
| 184 | void InvalidateTransferable(); | 82 | void InvalidateTransferable(); |
| @@ -187,21 +85,18 @@ public: | |||
| 187 | void InvalidatePrecompiled(); | 85 | void InvalidatePrecompiled(); |
| 188 | 86 | ||
| 189 | /// Saves a raw dump to the transferable file. Checks for collisions. | 87 | /// Saves a raw dump to the transferable file. Checks for collisions. |
| 190 | void SaveRaw(const ShaderDiskCacheRaw& entry); | 88 | void SaveEntry(const ShaderDiskCacheEntry& entry); |
| 191 | |||
| 192 | /// Saves shader usage to the transferable file. Does not check for collisions. | ||
| 193 | void SaveUsage(const ShaderDiskCacheUsage& usage); | ||
| 194 | 89 | ||
| 195 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | 90 | /// Saves a dump entry to the precompiled file. Does not check for collisions. |
| 196 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | 91 | void SavePrecompiled(u64 unique_identifier, GLuint program); |
| 197 | 92 | ||
| 198 | /// Serializes virtual precompiled shader cache file to real file | 93 | /// Serializes virtual precompiled shader cache file to real file |
| 199 | void SaveVirtualPrecompiledFile(); | 94 | void SaveVirtualPrecompiledFile(); |
| 200 | 95 | ||
| 201 | private: | 96 | private: |
| 202 | /// Loads the transferable cache. Returns empty on failure. | 97 | /// Loads the transferable cache. Returns empty on failure. |
| 203 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | 98 | std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( |
| 204 | LoadPrecompiledFile(FileUtil::IOFile& file); | 99 | FileUtil::IOFile& file); |
| 205 | 100 | ||
| 206 | /// Opens current game's transferable file and write it's header if it doesn't exist | 101 | /// Opens current game's transferable file and write it's header if it doesn't exist |
| 207 | FileUtil::IOFile AppendTransferableFile() const; | 102 | FileUtil::IOFile AppendTransferableFile() const; |
| @@ -270,7 +165,7 @@ private: | |||
| 270 | std::size_t precompiled_cache_virtual_file_offset = 0; | 165 | std::size_t precompiled_cache_virtual_file_offset = 0; |
| 271 | 166 | ||
| 272 | // Stored transferable shaders | 167 | // Stored transferable shaders |
| 273 | std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | 168 | std::unordered_set<u64> stored_transferable; |
| 274 | 169 | ||
| 275 | // The cache has been loaded at boot | 170 | // The cache has been loaded at boot |
| 276 | bool is_usable{}; | 171 | bool is_usable{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp deleted file mode 100644 index 34946fb47..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ /dev/null | |||
| @@ -1,109 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace OpenGL::GLShader { | ||
| 17 | |||
| 18 | using Tegra::Engines::Maxwell3D; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | using VideoCommon::Shader::CompileDepth; | ||
| 21 | using VideoCommon::Shader::CompilerSettings; | ||
| 22 | using VideoCommon::Shader::ProgramCode; | ||
| 23 | using VideoCommon::Shader::ShaderIR; | ||
| 24 | |||
| 25 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | ||
| 26 | std::string out = GetCommonDeclarations(); | ||
| 27 | out += fmt::format(R"( | ||
| 28 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 29 | float y_direction; | ||
| 30 | }}; | ||
| 31 | |||
| 32 | )", | ||
| 33 | EmulationUniformBlockBinding); | ||
| 34 | out += Decompile(device, ir, ShaderType::Vertex, "vertex"); | ||
| 35 | if (ir_b) { | ||
| 36 | out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); | ||
| 37 | } | ||
| 38 | |||
| 39 | out += R"( | ||
| 40 | void main() { | ||
| 41 | gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f); | ||
| 42 | execute_vertex(); | ||
| 43 | )"; | ||
| 44 | if (ir_b) { | ||
| 45 | out += " execute_vertex_b();"; | ||
| 46 | } | ||
| 47 | out += "}\n"; | ||
| 48 | return out; | ||
| 49 | } | ||
| 50 | |||
| 51 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | ||
| 52 | std::string out = GetCommonDeclarations(); | ||
| 53 | out += fmt::format(R"( | ||
| 54 | layout (std140, binding = {}) uniform gs_config {{ | ||
| 55 | float y_direction; | ||
| 56 | }}; | ||
| 57 | |||
| 58 | )", | ||
| 59 | EmulationUniformBlockBinding); | ||
| 60 | out += Decompile(device, ir, ShaderType::Geometry, "geometry"); | ||
| 61 | |||
| 62 | out += R"( | ||
| 63 | void main() { | ||
| 64 | execute_geometry(); | ||
| 65 | } | ||
| 66 | )"; | ||
| 67 | return out; | ||
| 68 | } | ||
| 69 | |||
| 70 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | ||
| 71 | std::string out = GetCommonDeclarations(); | ||
| 72 | out += fmt::format(R"( | ||
| 73 | layout (location = 0) out vec4 FragColor0; | ||
| 74 | layout (location = 1) out vec4 FragColor1; | ||
| 75 | layout (location = 2) out vec4 FragColor2; | ||
| 76 | layout (location = 3) out vec4 FragColor3; | ||
| 77 | layout (location = 4) out vec4 FragColor4; | ||
| 78 | layout (location = 5) out vec4 FragColor5; | ||
| 79 | layout (location = 6) out vec4 FragColor6; | ||
| 80 | layout (location = 7) out vec4 FragColor7; | ||
| 81 | |||
| 82 | layout (std140, binding = {}) uniform fs_config {{ | ||
| 83 | float y_direction; | ||
| 84 | }}; | ||
| 85 | |||
| 86 | )", | ||
| 87 | EmulationUniformBlockBinding); | ||
| 88 | out += Decompile(device, ir, ShaderType::Fragment, "fragment"); | ||
| 89 | |||
| 90 | out += R"( | ||
| 91 | void main() { | ||
| 92 | execute_fragment(); | ||
| 93 | } | ||
| 94 | )"; | ||
| 95 | return out; | ||
| 96 | } | ||
| 97 | |||
| 98 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { | ||
| 99 | std::string out = GetCommonDeclarations(); | ||
| 100 | out += Decompile(device, ir, ShaderType::Compute, "compute"); | ||
| 101 | out += R"( | ||
| 102 | void main() { | ||
| 103 | execute_compute(); | ||
| 104 | } | ||
| 105 | )"; | ||
| 106 | return out; | ||
| 107 | } | ||
| 108 | |||
| 109 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h deleted file mode 100644 index cba2be9f9..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ /dev/null | |||
| @@ -1,34 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | class Device; | ||
| 15 | } | ||
| 16 | |||
| 17 | namespace OpenGL::GLShader { | ||
| 18 | |||
| 19 | using VideoCommon::Shader::ProgramCode; | ||
| 20 | using VideoCommon::Shader::ShaderIR; | ||
| 21 | |||
| 22 | /// Generates the GLSL vertex shader program source code for the given VS program | ||
| 23 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); | ||
| 24 | |||
| 25 | /// Generates the GLSL geometry shader program source code for the given GS program | ||
| 26 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); | ||
| 27 | |||
| 28 | /// Generates the GLSL fragment shader program source code for the given FS program | ||
| 29 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); | ||
| 30 | |||
| 31 | /// Generates the GLSL compute shader program source code for the given CS program | ||
| 32 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); | ||
| 33 | |||
| 34 | } // namespace OpenGL::GLShader | ||
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 1e43c9ec0..255ac3147 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -94,6 +94,15 @@ void SetupDirtyShaders(Tables& tables) { | |||
| 94 | Shaders); | 94 | Shaders); |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | void SetupDirtyPolygonModes(Tables& tables) { | ||
| 98 | tables[0][OFF(polygon_mode_front)] = PolygonModeFront; | ||
| 99 | tables[0][OFF(polygon_mode_back)] = PolygonModeBack; | ||
| 100 | |||
| 101 | tables[1][OFF(polygon_mode_front)] = PolygonModes; | ||
| 102 | tables[1][OFF(polygon_mode_back)] = PolygonModes; | ||
| 103 | tables[0][OFF(fill_rectangle)] = PolygonModes; | ||
| 104 | } | ||
| 105 | |||
| 97 | void SetupDirtyDepthTest(Tables& tables) { | 106 | void SetupDirtyDepthTest(Tables& tables) { |
| 98 | auto& table = tables[0]; | 107 | auto& table = tables[0]; |
| 99 | table[OFF(depth_test_enable)] = DepthTest; | 108 | table[OFF(depth_test_enable)] = DepthTest; |
| @@ -211,6 +220,7 @@ void StateTracker::Initialize() { | |||
| 211 | SetupDirtyVertexArrays(tables); | 220 | SetupDirtyVertexArrays(tables); |
| 212 | SetupDirtyVertexFormat(tables); | 221 | SetupDirtyVertexFormat(tables); |
| 213 | SetupDirtyShaders(tables); | 222 | SetupDirtyShaders(tables); |
| 223 | SetupDirtyPolygonModes(tables); | ||
| 214 | SetupDirtyDepthTest(tables); | 224 | SetupDirtyDepthTest(tables); |
| 215 | SetupDirtyStencilTest(tables); | 225 | SetupDirtyStencilTest(tables); |
| 216 | SetupDirtyAlphaTest(tables); | 226 | SetupDirtyAlphaTest(tables); |
| @@ -228,7 +238,6 @@ void StateTracker::Initialize() { | |||
| 228 | SetupDirtyMisc(tables); | 238 | SetupDirtyMisc(tables); |
| 229 | 239 | ||
| 230 | auto& store = dirty.on_write_stores; | 240 | auto& store = dirty.on_write_stores; |
| 231 | SetupCommonOnWriteStores(store); | ||
| 232 | store[VertexBuffers] = true; | 241 | store[VertexBuffers] = true; |
| 233 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | 242 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { |
| 234 | store[VertexBuffer0 + i] = true; | 243 | store[VertexBuffer0 + i] = true; |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index e08482911..b882d75c3 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -59,6 +59,10 @@ enum : u8 { | |||
| 59 | Shaders, | 59 | Shaders, |
| 60 | ClipDistances, | 60 | ClipDistances, |
| 61 | 61 | ||
| 62 | PolygonModes, | ||
| 63 | PolygonModeFront, | ||
| 64 | PolygonModeBack, | ||
| 65 | |||
| 62 | ColorMask, | 66 | ColorMask, |
| 63 | FrontFace, | 67 | FrontFace, |
| 64 | CullTest, | 68 | CullTest, |
| @@ -111,6 +115,13 @@ public: | |||
| 111 | flags[OpenGL::Dirty::VertexInstance0 + 1] = true; | 115 | flags[OpenGL::Dirty::VertexInstance0 + 1] = true; |
| 112 | } | 116 | } |
| 113 | 117 | ||
| 118 | void NotifyPolygonModes() { | ||
| 119 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | ||
| 120 | flags[OpenGL::Dirty::PolygonModes] = true; | ||
| 121 | flags[OpenGL::Dirty::PolygonModeFront] = true; | ||
| 122 | flags[OpenGL::Dirty::PolygonModeBack] = true; | ||
| 123 | } | ||
| 124 | |||
| 114 | void NotifyViewport0() { | 125 | void NotifyViewport0() { |
| 115 | auto& flags = system.GPU().Maxwell3D().dirty.flags; | 126 | auto& flags = system.GPU().Maxwell3D().dirty.flags; |
| 116 | flags[OpenGL::Dirty::Viewports] = true; | 127 | flags[OpenGL::Dirty::Viewports] = true; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2d3838a7a..f424e3000 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI | 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI |
| 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F | 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F |
| 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U | 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U |
| 56 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S | ||
| 56 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI | 57 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI |
| 57 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F | 58 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F |
| 58 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI | 59 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 494e38e7a..89f0e04ef 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -488,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { | |||
| 488 | return GL_COPY; | 488 | return GL_COPY; |
| 489 | } | 489 | } |
| 490 | 490 | ||
| 491 | inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { | ||
| 492 | switch (polygon_mode) { | ||
| 493 | case Maxwell::PolygonMode::Point: | ||
| 494 | return GL_POINT; | ||
| 495 | case Maxwell::PolygonMode::Line: | ||
| 496 | return GL_LINE; | ||
| 497 | case Maxwell::PolygonMode::Fill: | ||
| 498 | return GL_FILL; | ||
| 499 | } | ||
| 500 | UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode)); | ||
| 501 | return GL_FILL; | ||
| 502 | } | ||
| 503 | |||
| 491 | } // namespace MaxwellToGL | 504 | } // namespace MaxwellToGL |
| 492 | } // namespace OpenGL | 505 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index c05677cd9..fca5e3ec0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -5,8 +5,11 @@ | |||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <cstddef> | 6 | #include <cstddef> |
| 7 | #include <cstdlib> | 7 | #include <cstdlib> |
| 8 | #include <cstring> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 10 | |||
| 9 | #include <glad/glad.h> | 11 | #include <glad/glad.h> |
| 12 | |||
| 10 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 11 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 12 | #include "common/microprofile.h" | 15 | #include "common/microprofile.h" |
| @@ -25,6 +28,8 @@ | |||
| 25 | 28 | ||
| 26 | namespace OpenGL { | 29 | namespace OpenGL { |
| 27 | 30 | ||
| 31 | namespace { | ||
| 32 | |||
| 28 | // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have | 33 | // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have |
| 29 | // to wait on available presentation frames. | 34 | // to wait on available presentation frames. |
| 30 | constexpr std::size_t SWAP_CHAIN_SIZE = 3; | 35 | constexpr std::size_t SWAP_CHAIN_SIZE = 3; |
| @@ -41,124 +46,6 @@ struct Frame { | |||
| 41 | bool is_srgb{}; /// Framebuffer is sRGB or RGB | 46 | bool is_srgb{}; /// Framebuffer is sRGB or RGB |
| 42 | }; | 47 | }; |
| 43 | 48 | ||
| 44 | /** | ||
| 45 | * For smooth Vsync rendering, we want to always present the latest frame that the core generates, | ||
| 46 | * but also make sure that rendering happens at the pace that the frontend dictates. This is a | ||
| 47 | * helper class that the renderer uses to sync frames between the render thread and the presentation | ||
| 48 | * thread | ||
| 49 | */ | ||
| 50 | class FrameMailbox { | ||
| 51 | public: | ||
| 52 | std::mutex swap_chain_lock; | ||
| 53 | std::condition_variable present_cv; | ||
| 54 | std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; | ||
| 55 | std::queue<Frame*> free_queue; | ||
| 56 | std::deque<Frame*> present_queue; | ||
| 57 | Frame* previous_frame{}; | ||
| 58 | |||
| 59 | FrameMailbox() { | ||
| 60 | for (auto& frame : swap_chain) { | ||
| 61 | free_queue.push(&frame); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | ~FrameMailbox() { | ||
| 66 | // lock the mutex and clear out the present and free_queues and notify any people who are | ||
| 67 | // blocked to prevent deadlock on shutdown | ||
| 68 | std::scoped_lock lock{swap_chain_lock}; | ||
| 69 | std::queue<Frame*>().swap(free_queue); | ||
| 70 | present_queue.clear(); | ||
| 71 | present_cv.notify_all(); | ||
| 72 | } | ||
| 73 | |||
| 74 | void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { | ||
| 75 | frame->present.Release(); | ||
| 76 | frame->present.Create(); | ||
| 77 | GLint previous_draw_fbo{}; | ||
| 78 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); | ||
| 79 | glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); | ||
| 80 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 81 | frame->color.handle); | ||
| 82 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 83 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); | ||
| 84 | } | ||
| 85 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); | ||
| 86 | frame->color_reloaded = false; | ||
| 87 | } | ||
| 88 | |||
| 89 | void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { | ||
| 90 | // Recreate the color texture attachment | ||
| 91 | frame->color.Release(); | ||
| 92 | frame->color.Create(); | ||
| 93 | const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; | ||
| 94 | glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); | ||
| 95 | |||
| 96 | // Recreate the FBO for the render target | ||
| 97 | frame->render.Release(); | ||
| 98 | frame->render.Create(); | ||
| 99 | glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); | ||
| 100 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 101 | frame->color.handle); | ||
| 102 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 103 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); | ||
| 104 | } | ||
| 105 | |||
| 106 | frame->width = width; | ||
| 107 | frame->height = height; | ||
| 108 | frame->color_reloaded = true; | ||
| 109 | } | ||
| 110 | |||
| 111 | Frame* GetRenderFrame() { | ||
| 112 | std::unique_lock lock{swap_chain_lock}; | ||
| 113 | |||
| 114 | // If theres no free frames, we will reuse the oldest render frame | ||
| 115 | if (free_queue.empty()) { | ||
| 116 | auto frame = present_queue.back(); | ||
| 117 | present_queue.pop_back(); | ||
| 118 | return frame; | ||
| 119 | } | ||
| 120 | |||
| 121 | Frame* frame = free_queue.front(); | ||
| 122 | free_queue.pop(); | ||
| 123 | return frame; | ||
| 124 | } | ||
| 125 | |||
| 126 | void ReleaseRenderFrame(Frame* frame) { | ||
| 127 | std::unique_lock lock{swap_chain_lock}; | ||
| 128 | present_queue.push_front(frame); | ||
| 129 | present_cv.notify_one(); | ||
| 130 | } | ||
| 131 | |||
| 132 | Frame* TryGetPresentFrame(int timeout_ms) { | ||
| 133 | std::unique_lock lock{swap_chain_lock}; | ||
| 134 | // wait for new entries in the present_queue | ||
| 135 | present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), | ||
| 136 | [&] { return !present_queue.empty(); }); | ||
| 137 | if (present_queue.empty()) { | ||
| 138 | // timed out waiting for a frame to draw so return the previous frame | ||
| 139 | return previous_frame; | ||
| 140 | } | ||
| 141 | |||
| 142 | // free the previous frame and add it back to the free queue | ||
| 143 | if (previous_frame) { | ||
| 144 | free_queue.push(previous_frame); | ||
| 145 | } | ||
| 146 | |||
| 147 | // the newest entries are pushed to the front of the queue | ||
| 148 | Frame* frame = present_queue.front(); | ||
| 149 | present_queue.pop_front(); | ||
| 150 | // remove all old entries from the present queue and move them back to the free_queue | ||
| 151 | for (auto f : present_queue) { | ||
| 152 | free_queue.push(f); | ||
| 153 | } | ||
| 154 | present_queue.clear(); | ||
| 155 | previous_frame = frame; | ||
| 156 | return frame; | ||
| 157 | } | ||
| 158 | }; | ||
| 159 | |||
| 160 | namespace { | ||
| 161 | |||
| 162 | constexpr char VERTEX_SHADER[] = R"( | 49 | constexpr char VERTEX_SHADER[] = R"( |
| 163 | #version 430 core | 50 | #version 430 core |
| 164 | 51 | ||
| @@ -211,6 +98,24 @@ struct ScreenRectVertex { | |||
| 211 | std::array<GLfloat, 2> tex_coord; | 98 | std::array<GLfloat, 2> tex_coord; |
| 212 | }; | 99 | }; |
| 213 | 100 | ||
| 101 | /// Returns true if any debug tool is attached | ||
| 102 | bool HasDebugTool() { | ||
| 103 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||
| 104 | if (nsight) { | ||
| 105 | return true; | ||
| 106 | } | ||
| 107 | |||
| 108 | GLint num_extensions; | ||
| 109 | glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); | ||
| 110 | for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { | ||
| 111 | const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); | ||
| 112 | if (!std::strcmp(name, "GL_EXT_debug_tool")) { | ||
| 113 | return true; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | return false; | ||
| 117 | } | ||
| 118 | |||
| 214 | /** | 119 | /** |
| 215 | * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left | 120 | * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left |
| 216 | * corner and (width, height) on the lower-bottom. | 121 | * corner and (width, height) on the lower-bottom. |
| @@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 294 | 199 | ||
| 295 | } // Anonymous namespace | 200 | } // Anonymous namespace |
| 296 | 201 | ||
| 202 | /** | ||
| 203 | * For smooth Vsync rendering, we want to always present the latest frame that the core generates, | ||
| 204 | * but also make sure that rendering happens at the pace that the frontend dictates. This is a | ||
| 205 | * helper class that the renderer uses to sync frames between the render thread and the presentation | ||
| 206 | * thread | ||
| 207 | */ | ||
| 208 | class FrameMailbox { | ||
| 209 | public: | ||
| 210 | std::mutex swap_chain_lock; | ||
| 211 | std::condition_variable present_cv; | ||
| 212 | std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; | ||
| 213 | std::queue<Frame*> free_queue; | ||
| 214 | std::deque<Frame*> present_queue; | ||
| 215 | Frame* previous_frame{}; | ||
| 216 | |||
| 217 | FrameMailbox() : has_debug_tool{HasDebugTool()} { | ||
| 218 | for (auto& frame : swap_chain) { | ||
| 219 | free_queue.push(&frame); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | ~FrameMailbox() { | ||
| 224 | // lock the mutex and clear out the present and free_queues and notify any people who are | ||
| 225 | // blocked to prevent deadlock on shutdown | ||
| 226 | std::scoped_lock lock{swap_chain_lock}; | ||
| 227 | std::queue<Frame*>().swap(free_queue); | ||
| 228 | present_queue.clear(); | ||
| 229 | present_cv.notify_all(); | ||
| 230 | } | ||
| 231 | |||
| 232 | void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { | ||
| 233 | frame->present.Release(); | ||
| 234 | frame->present.Create(); | ||
| 235 | GLint previous_draw_fbo{}; | ||
| 236 | glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); | ||
| 237 | glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); | ||
| 238 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 239 | frame->color.handle); | ||
| 240 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 241 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); | ||
| 242 | } | ||
| 243 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); | ||
| 244 | frame->color_reloaded = false; | ||
| 245 | } | ||
| 246 | |||
| 247 | void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { | ||
| 248 | // Recreate the color texture attachment | ||
| 249 | frame->color.Release(); | ||
| 250 | frame->color.Create(); | ||
| 251 | const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; | ||
| 252 | glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); | ||
| 253 | |||
| 254 | // Recreate the FBO for the render target | ||
| 255 | frame->render.Release(); | ||
| 256 | frame->render.Create(); | ||
| 257 | glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); | ||
| 258 | glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, | ||
| 259 | frame->color.handle); | ||
| 260 | if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { | ||
| 261 | LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); | ||
| 262 | } | ||
| 263 | |||
| 264 | frame->width = width; | ||
| 265 | frame->height = height; | ||
| 266 | frame->color_reloaded = true; | ||
| 267 | } | ||
| 268 | |||
| 269 | Frame* GetRenderFrame() { | ||
| 270 | std::unique_lock lock{swap_chain_lock}; | ||
| 271 | |||
| 272 | // If theres no free frames, we will reuse the oldest render frame | ||
| 273 | if (free_queue.empty()) { | ||
| 274 | auto frame = present_queue.back(); | ||
| 275 | present_queue.pop_back(); | ||
| 276 | return frame; | ||
| 277 | } | ||
| 278 | |||
| 279 | Frame* frame = free_queue.front(); | ||
| 280 | free_queue.pop(); | ||
| 281 | return frame; | ||
| 282 | } | ||
| 283 | |||
| 284 | void ReleaseRenderFrame(Frame* frame) { | ||
| 285 | std::unique_lock lock{swap_chain_lock}; | ||
| 286 | present_queue.push_front(frame); | ||
| 287 | present_cv.notify_one(); | ||
| 288 | |||
| 289 | DebugNotifyNextFrame(); | ||
| 290 | } | ||
| 291 | |||
| 292 | Frame* TryGetPresentFrame(int timeout_ms) { | ||
| 293 | DebugWaitForNextFrame(); | ||
| 294 | |||
| 295 | std::unique_lock lock{swap_chain_lock}; | ||
| 296 | // wait for new entries in the present_queue | ||
| 297 | present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), | ||
| 298 | [&] { return !present_queue.empty(); }); | ||
| 299 | if (present_queue.empty()) { | ||
| 300 | // timed out waiting for a frame to draw so return the previous frame | ||
| 301 | return previous_frame; | ||
| 302 | } | ||
| 303 | |||
| 304 | // free the previous frame and add it back to the free queue | ||
| 305 | if (previous_frame) { | ||
| 306 | free_queue.push(previous_frame); | ||
| 307 | } | ||
| 308 | |||
| 309 | // the newest entries are pushed to the front of the queue | ||
| 310 | Frame* frame = present_queue.front(); | ||
| 311 | present_queue.pop_front(); | ||
| 312 | // remove all old entries from the present queue and move them back to the free_queue | ||
| 313 | for (auto f : present_queue) { | ||
| 314 | free_queue.push(f); | ||
| 315 | } | ||
| 316 | present_queue.clear(); | ||
| 317 | previous_frame = frame; | ||
| 318 | return frame; | ||
| 319 | } | ||
| 320 | |||
| 321 | private: | ||
| 322 | std::mutex debug_synch_mutex; | ||
| 323 | std::condition_variable debug_synch_condition; | ||
| 324 | std::atomic_int frame_for_debug{}; | ||
| 325 | const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step | ||
| 326 | |||
| 327 | /// Signal that a new frame is available (called from GPU thread) | ||
| 328 | void DebugNotifyNextFrame() { | ||
| 329 | if (!has_debug_tool) { | ||
| 330 | return; | ||
| 331 | } | ||
| 332 | frame_for_debug++; | ||
| 333 | std::lock_guard lock{debug_synch_mutex}; | ||
| 334 | debug_synch_condition.notify_one(); | ||
| 335 | } | ||
| 336 | |||
| 337 | /// Wait for a new frame to be available (called from presentation thread) | ||
| 338 | void DebugWaitForNextFrame() { | ||
| 339 | if (!has_debug_tool) { | ||
| 340 | return; | ||
| 341 | } | ||
| 342 | const int last_frame = frame_for_debug; | ||
| 343 | std::unique_lock lock{debug_synch_mutex}; | ||
| 344 | debug_synch_condition.wait(lock, | ||
| 345 | [this, last_frame] { return frame_for_debug > last_frame; }); | ||
| 346 | } | ||
| 347 | }; | ||
| 348 | |||
| 297 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) | 349 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) |
| 298 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, | 350 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, |
| 299 | frame_mailbox{std::make_unique<FrameMailbox>()} {} | 351 | frame_mailbox{std::make_unique<FrameMailbox>()} {} |
| @@ -576,6 +628,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 576 | 628 | ||
| 577 | // TODO: Signal state tracker about these changes | 629 | // TODO: Signal state tracker about these changes |
| 578 | state_tracker.NotifyScreenDrawVertexArray(); | 630 | state_tracker.NotifyScreenDrawVertexArray(); |
| 631 | state_tracker.NotifyPolygonModes(); | ||
| 579 | state_tracker.NotifyViewport0(); | 632 | state_tracker.NotifyViewport0(); |
| 580 | state_tracker.NotifyScissor0(); | 633 | state_tracker.NotifyScissor0(); |
| 581 | state_tracker.NotifyColorMask0(); | 634 | state_tracker.NotifyColorMask0(); |
| @@ -611,6 +664,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 611 | glDisable(GL_ALPHA_TEST); | 664 | glDisable(GL_ALPHA_TEST); |
| 612 | glDisablei(GL_BLEND, 0); | 665 | glDisablei(GL_BLEND, 0); |
| 613 | glDisablei(GL_SCISSOR_TEST, 0); | 666 | glDisablei(GL_SCISSOR_TEST, 0); |
| 667 | glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); | ||
| 614 | glCullFace(GL_BACK); | 668 | glCullFace(GL_BACK); |
| 615 | glFrontFace(GL_CW); | 669 | glFrontFace(GL_CW); |
| 616 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | 670 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index df3ac707c..f93447610 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -125,6 +125,7 @@ struct FormatTuple { | |||
| 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI | 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI |
| 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F | 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F |
| 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U | 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U |
| 128 | {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S | ||
| 128 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI | 129 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI |
| 129 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F | 130 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F |
| 130 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI | 131 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI |
| @@ -256,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { | |||
| 256 | return vk::ShaderStageFlagBits::eGeometry; | 257 | return vk::ShaderStageFlagBits::eGeometry; |
| 257 | case Tegra::Engines::ShaderType::Fragment: | 258 | case Tegra::Engines::ShaderType::Fragment: |
| 258 | return vk::ShaderStageFlagBits::eFragment; | 259 | return vk::ShaderStageFlagBits::eFragment; |
| 260 | case Tegra::Engines::ShaderType::Compute: | ||
| 261 | return vk::ShaderStageFlagBits::eCompute; | ||
| 259 | } | 262 | } |
| 260 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); | 263 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); |
| 261 | return {}; | 264 | return {}; |
| @@ -331,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 331 | return vk::Format::eR16G16B16Unorm; | 334 | return vk::Format::eR16G16B16Unorm; |
| 332 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 335 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 333 | return vk::Format::eR16G16B16A16Unorm; | 336 | return vk::Format::eR16G16B16A16Unorm; |
| 337 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | ||
| 338 | return vk::Format::eA2B10G10R10UnormPack32; | ||
| 334 | default: | 339 | default: |
| 335 | break; | 340 | break; |
| 336 | } | 341 | } |
| @@ -364,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 364 | return vk::Format::eR8G8B8A8Uint; | 369 | return vk::Format::eR8G8B8A8Uint; |
| 365 | case Maxwell::VertexAttribute::Size::Size_32: | 370 | case Maxwell::VertexAttribute::Size::Size_32: |
| 366 | return vk::Format::eR32Uint; | 371 | return vk::Format::eR32Uint; |
| 372 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 373 | return vk::Format::eR32G32Uint; | ||
| 374 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 375 | return vk::Format::eR32G32B32Uint; | ||
| 367 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 376 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 368 | return vk::Format::eR32G32B32A32Uint; | 377 | return vk::Format::eR32G32B32A32Uint; |
| 369 | default: | 378 | default: |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 886bde3b9..28d2fbc4f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 107 | features.occlusionQueryPrecise = true; | 107 | features.occlusionQueryPrecise = true; |
| 108 | features.fragmentStoresAndAtomics = true; | 108 | features.fragmentStoresAndAtomics = true; |
| 109 | features.shaderImageGatherExtended = true; | 109 | features.shaderImageGatherExtended = true; |
| 110 | features.shaderStorageImageReadWithoutFormat = | 110 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; |
| 111 | is_shader_storage_img_read_without_format_supported; | ||
| 112 | features.shaderStorageImageWriteWithoutFormat = true; | 111 | features.shaderStorageImageWriteWithoutFormat = true; |
| 113 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | 112 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; |
| 114 | 113 | ||
| @@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 148 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | 147 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 150 | vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; | ||
| 151 | if (ext_transform_feedback) { | ||
| 152 | transform_feedback.transformFeedback = true; | ||
| 153 | transform_feedback.geometryStreams = true; | ||
| 154 | SetNext(next, transform_feedback); | ||
| 155 | } else { | ||
| 156 | LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks"); | ||
| 157 | } | ||
| 158 | |||
| 151 | if (!ext_depth_range_unrestricted) { | 159 | if (!ext_depth_range_unrestricted) { |
| 152 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 160 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 153 | } | 161 | } |
| @@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 385 | } | 393 | } |
| 386 | }; | 394 | }; |
| 387 | 395 | ||
| 388 | extensions.reserve(14); | 396 | extensions.reserve(15); |
| 389 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 397 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |
| 390 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 398 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |
| 391 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | 399 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |
| @@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 397 | 405 | ||
| 398 | [[maybe_unused]] const bool nsight = | 406 | [[maybe_unused]] const bool nsight = |
| 399 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 407 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 400 | bool khr_shader_float16_int8{}; | 408 | bool has_khr_shader_float16_int8{}; |
| 401 | bool ext_subgroup_size_control{}; | 409 | bool has_ext_subgroup_size_control{}; |
| 410 | bool has_ext_transform_feedback{}; | ||
| 402 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 411 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { |
| 403 | Test(extension, khr_uniform_buffer_standard_layout, | 412 | Test(extension, khr_uniform_buffer_standard_layout, |
| 404 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 413 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 405 | Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | 414 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, |
| 415 | false); | ||
| 406 | Test(extension, ext_depth_range_unrestricted, | 416 | Test(extension, ext_depth_range_unrestricted, |
| 407 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | 417 | VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); |
| 408 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | 418 | Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); |
| 409 | Test(extension, ext_shader_viewport_index_layer, | 419 | Test(extension, ext_shader_viewport_index_layer, |
| 410 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); | 420 | VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true); |
| 411 | Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, | 421 | Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, |
| 422 | false); | ||
| 423 | Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, | ||
| 412 | false); | 424 | false); |
| 413 | if (Settings::values.renderer_debug) { | 425 | if (Settings::values.renderer_debug) { |
| 414 | Test(extension, nv_device_diagnostic_checkpoints, | 426 | Test(extension, nv_device_diagnostic_checkpoints, |
| @@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 416 | } | 428 | } |
| 417 | } | 429 | } |
| 418 | 430 | ||
| 419 | if (khr_shader_float16_int8) { | 431 | if (has_khr_shader_float16_int8) { |
| 420 | is_float16_supported = | 432 | is_float16_supported = |
| 421 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | 433 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; |
| 422 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 434 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 423 | } | 435 | } |
| 424 | 436 | ||
| 425 | if (ext_subgroup_size_control) { | 437 | if (has_ext_subgroup_size_control) { |
| 426 | const auto features = | 438 | const auto features = |
| 427 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); | 439 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); |
| 428 | const auto properties = | 440 | const auto properties = |
| @@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 439 | is_warp_potentially_bigger = true; | 451 | is_warp_potentially_bigger = true; |
| 440 | } | 452 | } |
| 441 | 453 | ||
| 454 | if (has_ext_transform_feedback) { | ||
| 455 | const auto features = | ||
| 456 | GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); | ||
| 457 | const auto properties = | ||
| 458 | GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); | ||
| 459 | |||
| 460 | if (features.transformFeedback && features.geometryStreams && | ||
| 461 | properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && | ||
| 462 | properties.transformFeedbackQueries && properties.transformFeedbackDraw) { | ||
| 463 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); | ||
| 464 | ext_transform_feedback = true; | ||
| 465 | } | ||
| 466 | } | ||
| 467 | |||
| 442 | return extensions; | 468 | return extensions; |
| 443 | } | 469 | } |
| 444 | 470 | ||
| @@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 467 | 493 | ||
| 468 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | 494 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { |
| 469 | const auto supported_features{physical.getFeatures(dldi)}; | 495 | const auto supported_features{physical.getFeatures(dldi)}; |
| 470 | is_shader_storage_img_read_without_format_supported = | 496 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 471 | supported_features.shaderStorageImageReadWithoutFormat; | ||
| 472 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | 497 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); |
| 473 | } | 498 | } |
| 474 | 499 | ||
| @@ -510,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti | |||
| 510 | vk::Format::eR32G32Sfloat, | 535 | vk::Format::eR32G32Sfloat, |
| 511 | vk::Format::eR32G32Uint, | 536 | vk::Format::eR32G32Uint, |
| 512 | vk::Format::eR16G16B16A16Uint, | 537 | vk::Format::eR16G16B16A16Uint, |
| 538 | vk::Format::eR16G16B16A16Snorm, | ||
| 513 | vk::Format::eR16G16B16A16Unorm, | 539 | vk::Format::eR16G16B16A16Unorm, |
| 514 | vk::Format::eR16G16Unorm, | 540 | vk::Format::eR16G16Unorm, |
| 515 | vk::Format::eR16G16Snorm, | 541 | vk::Format::eR16G16Snorm, |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 2c27ad730..6e656517f 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -122,11 +122,6 @@ public: | |||
| 122 | return properties.limits.maxPushConstantsSize; | 122 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | /// Returns true if Shader storage Image Read Without Format supported. | ||
| 126 | bool IsShaderStorageImageReadWithoutFormatSupported() const { | ||
| 127 | return is_shader_storage_img_read_without_format_supported; | ||
| 128 | } | ||
| 129 | |||
| 130 | /// Returns true if ASTC is natively supported. | 125 | /// Returns true if ASTC is natively supported. |
| 131 | bool IsOptimalAstcSupported() const { | 126 | bool IsOptimalAstcSupported() const { |
| 132 | return is_optimal_astc_supported; | 127 | return is_optimal_astc_supported; |
| @@ -147,6 +142,11 @@ public: | |||
| 147 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; | 142 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; |
| 148 | } | 143 | } |
| 149 | 144 | ||
| 145 | /// Returns true if formatless image load is supported. | ||
| 146 | bool IsFormatlessImageLoadSupported() const { | ||
| 147 | return is_formatless_image_load_supported; | ||
| 148 | } | ||
| 149 | |||
| 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. | 150 | /// Returns true if the device supports VK_EXT_scalar_block_layout. |
| 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { | 151 | bool IsKhrUniformBufferStandardLayoutSupported() const { |
| 152 | return khr_uniform_buffer_standard_layout; | 152 | return khr_uniform_buffer_standard_layout; |
| @@ -167,6 +167,11 @@ public: | |||
| 167 | return ext_shader_viewport_index_layer; | 167 | return ext_shader_viewport_index_layer; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | /// Returns true if the device supports VK_EXT_transform_feedback. | ||
| 171 | bool IsExtTransformFeedbackSupported() const { | ||
| 172 | return ext_transform_feedback; | ||
| 173 | } | ||
| 174 | |||
| 170 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. | 175 | /// Returns true if the device supports VK_NV_device_diagnostic_checkpoints. |
| 171 | bool IsNvDeviceDiagnosticCheckpoints() const { | 176 | bool IsNvDeviceDiagnosticCheckpoints() const { |
| 172 | return nv_device_diagnostic_checkpoints; | 177 | return nv_device_diagnostic_checkpoints; |
| @@ -214,26 +219,26 @@ private: | |||
| 214 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( | 219 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( |
| 215 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 220 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); |
| 216 | 221 | ||
| 217 | const vk::PhysicalDevice physical; ///< Physical device. | 222 | const vk::PhysicalDevice physical; ///< Physical device. |
| 218 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 223 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. |
| 219 | vk::PhysicalDeviceProperties properties; ///< Device properties. | 224 | vk::PhysicalDeviceProperties properties; ///< Device properties. |
| 220 | UniqueDevice logical; ///< Logical device. | 225 | UniqueDevice logical; ///< Logical device. |
| 221 | vk::Queue graphics_queue; ///< Main graphics queue. | 226 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 222 | vk::Queue present_queue; ///< Main present queue. | 227 | vk::Queue present_queue; ///< Main present queue. |
| 223 | u32 graphics_family{}; ///< Main graphics queue family index. | 228 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 224 | u32 present_family{}; ///< Main present queue family index. | 229 | u32 present_family{}; ///< Main present queue family index. |
| 225 | vk::DriverIdKHR driver_id{}; ///< Driver ID. | 230 | vk::DriverIdKHR driver_id{}; ///< Driver ID. |
| 226 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced. | 231 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed |
| 227 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | 232 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 228 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | 233 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 229 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | 234 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 235 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | ||
| 230 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 236 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 231 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 237 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| 232 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. | 238 | bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. |
| 233 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. | 239 | bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. |
| 240 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | ||
| 234 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. | 241 | bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. |
| 235 | bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage | ||
| 236 | ///< image read without format | ||
| 237 | 242 | ||
| 238 | // Telemetry parameters | 243 | // Telemetry parameters |
| 239 | std::string vendor_name; ///< Device's driver name. | 244 | std::string vendor_name; ///< Device's driver name. |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 144e1e007..557b9d662 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -161,8 +161,8 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag | |||
| 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, |
| 162 | ProgramCode program_code, u32 main_offset) | 162 | ProgramCode program_code, u32 main_offset) |
| 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, |
| 164 | program_code{std::move(program_code)}, locker{stage, GetEngine(system, stage)}, | 164 | program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, |
| 165 | shader_ir{this->program_code, main_offset, compiler_settings, locker}, | 165 | shader_ir{this->program_code, main_offset, compiler_settings, registry}, |
| 166 | entries{GenerateShaderEntries(shader_ir)} {} | 166 | entries{GenerateShaderEntries(shader_ir)} {} |
| 167 | 167 | ||
| 168 | CachedShader::~CachedShader() = default; | 168 | CachedShader::~CachedShader() = default; |
| @@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | |||
| 179 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 179 | VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 180 | const VKDevice& device, VKScheduler& scheduler, | 180 | const VKDevice& device, VKScheduler& scheduler, |
| 181 | VKDescriptorPool& descriptor_pool, | 181 | VKDescriptorPool& descriptor_pool, |
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue) | 182 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 183 | VKRenderPassCache& renderpass_cache) | ||
| 183 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | 184 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, |
| 184 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | 185 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, |
| 185 | renderpass_cache(device) {} | 186 | renderpass_cache{renderpass_cache} {} |
| 186 | 187 | ||
| 187 | VKPipelineCache::~VKPipelineCache() = default; | 188 | VKPipelineCache::~VKPipelineCache() = default; |
| 188 | 189 | ||
| @@ -191,7 +192,6 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 191 | 192 | ||
| 192 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; | 193 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; |
| 193 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 194 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 194 | const auto& shader_config = gpu.regs.shader_config[index]; | ||
| 195 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 195 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 196 | 196 | ||
| 197 | // Skip stages that are not enabled | 197 | // Skip stages that are not enabled |
| @@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 273 | specialization.workgroup_size = key.workgroup_size; | 273 | specialization.workgroup_size = key.workgroup_size; |
| 274 | specialization.shared_memory_size = key.shared_memory_size; | 274 | specialization.shared_memory_size = key.shared_memory_size; |
| 275 | 275 | ||
| 276 | const SPIRVShader spirv_shader{ | 276 | const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute, |
| 277 | Decompile(device, shader->GetIR(), ShaderType::Compute, specialization), | 277 | shader->GetRegistry(), specialization), |
| 278 | shader->GetEntries()}; | 278 | shader->GetEntries()}; |
| 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, | 279 | entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool, |
| 280 | update_descriptor_queue, spirv_shader); | 280 | update_descriptor_queue, spirv_shader); |
| 281 | return *entry; | 281 | return *entry; |
| @@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 324 | const auto& gpu = system.GPU().Maxwell3D(); | 324 | const auto& gpu = system.GPU().Maxwell3D(); |
| 325 | 325 | ||
| 326 | Specialization specialization; | 326 | Specialization specialization; |
| 327 | specialization.primitive_topology = fixed_state.input_assembly.topology; | 327 | if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { |
| 328 | if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) { | ||
| 329 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); | 328 | ASSERT(fixed_state.input_assembly.point_size != 0.0f); |
| 330 | specialization.point_size = fixed_state.input_assembly.point_size; | 329 | specialization.point_size = fixed_state.input_assembly.point_size; |
| 331 | } | 330 | } |
| @@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 333 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; | 332 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; |
| 334 | } | 333 | } |
| 335 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 334 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 336 | specialization.tessellation.primitive = fixed_state.tessellation.primitive; | ||
| 337 | specialization.tessellation.spacing = fixed_state.tessellation.spacing; | ||
| 338 | specialization.tessellation.clockwise = fixed_state.tessellation.clockwise; | ||
| 339 | 335 | ||
| 340 | SPIRVProgram program; | 336 | SPIRVProgram program; |
| 341 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 337 | std::vector<vk::DescriptorSetLayoutBinding> bindings; |
| @@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 356 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 352 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 357 | const auto program_type = GetShaderType(program_enum); | 353 | const auto program_type = GetShaderType(program_enum); |
| 358 | const auto& entries = shader->GetEntries(); | 354 | const auto& entries = shader->GetEntries(); |
| 359 | program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization), | 355 | program[stage] = { |
| 360 | entries}; | 356 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
| 357 | entries}; | ||
| 361 | 358 | ||
| 362 | if (program_enum == Maxwell::ShaderProgram::VertexA) { | 359 | if (program_enum == Maxwell::ShaderProgram::VertexA) { |
| 363 | // VertexB was combined with VertexA, so we skip the VertexB iteration | 360 | // VertexB was combined with VertexA, so we skip the VertexB iteration |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 92a670cc7..c4c112290 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -25,7 +25,7 @@ | |||
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 28 | #include "video_core/shader/const_buffer_locker.h" | 28 | #include "video_core/shader/registry.h" |
| 29 | #include "video_core/shader/shader_ir.h" | 29 | #include "video_core/shader/shader_ir.h" |
| 30 | #include "video_core/surface.h" | 30 | #include "video_core/surface.h" |
| 31 | 31 | ||
| @@ -132,6 +132,10 @@ public: | |||
| 132 | return shader_ir; | 132 | return shader_ir; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const VideoCommon::Shader::Registry& GetRegistry() const { | ||
| 136 | return registry; | ||
| 137 | } | ||
| 138 | |||
| 135 | const VideoCommon::Shader::ShaderIR& GetIR() const { | 139 | const VideoCommon::Shader::ShaderIR& GetIR() const { |
| 136 | return shader_ir; | 140 | return shader_ir; |
| 137 | } | 141 | } |
| @@ -147,7 +151,7 @@ private: | |||
| 147 | GPUVAddr gpu_addr{}; | 151 | GPUVAddr gpu_addr{}; |
| 148 | VAddr cpu_addr{}; | 152 | VAddr cpu_addr{}; |
| 149 | ProgramCode program_code; | 153 | ProgramCode program_code; |
| 150 | VideoCommon::Shader::ConstBufferLocker locker; | 154 | VideoCommon::Shader::Registry registry; |
| 151 | VideoCommon::Shader::ShaderIR shader_ir; | 155 | VideoCommon::Shader::ShaderIR shader_ir; |
| 152 | ShaderEntries entries; | 156 | ShaderEntries entries; |
| 153 | }; | 157 | }; |
| @@ -157,7 +161,8 @@ public: | |||
| 157 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 161 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 158 | const VKDevice& device, VKScheduler& scheduler, | 162 | const VKDevice& device, VKScheduler& scheduler, |
| 159 | VKDescriptorPool& descriptor_pool, | 163 | VKDescriptorPool& descriptor_pool, |
| 160 | VKUpdateDescriptorQueue& update_descriptor_queue); | 164 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 165 | VKRenderPassCache& renderpass_cache); | ||
| 161 | ~VKPipelineCache(); | 166 | ~VKPipelineCache(); |
| 162 | 167 | ||
| 163 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | 168 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); |
| @@ -180,8 +185,7 @@ private: | |||
| 180 | VKScheduler& scheduler; | 185 | VKScheduler& scheduler; |
| 181 | VKDescriptorPool& descriptor_pool; | 186 | VKDescriptorPool& descriptor_pool; |
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | 187 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 183 | 188 | VKRenderPassCache& renderpass_cache; | |
| 184 | VKRenderPassCache renderpass_cache; | ||
| 185 | 189 | ||
| 186 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 190 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 187 | 191 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2bcb17b56..58c69b786 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||
| 287 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, | 287 | screen_info{screen_info}, device{device}, resource_manager{resource_manager}, |
| 288 | memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, | 288 | memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, |
| 289 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), | 289 | staging_pool(device, memory_manager, scheduler), descriptor_pool(device), |
| 290 | update_descriptor_queue(device, scheduler), | 290 | update_descriptor_queue(device, scheduler), renderpass_cache(device), |
| 291 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 291 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 292 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | 292 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), |
| 293 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | 293 | texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, |
| 294 | staging_pool), | 294 | staging_pool), |
| 295 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | 295 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 296 | renderpass_cache), | ||
| 296 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | 297 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |
| 297 | sampler_cache(device), query_cache(system, *this, device, scheduler) { | 298 | sampler_cache(device), query_cache(system, *this, device, scheduler) { |
| 298 | scheduler.SetQueryCache(query_cache); | 299 | scheduler.SetQueryCache(query_cache); |
| @@ -347,6 +348,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 347 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | 348 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); |
| 348 | } | 349 | } |
| 349 | 350 | ||
| 351 | BeginTransformFeedback(); | ||
| 352 | |||
| 350 | const auto pipeline_layout = pipeline.GetLayout(); | 353 | const auto pipeline_layout = pipeline.GetLayout(); |
| 351 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | 354 | const auto descriptor_set = pipeline.CommitDescriptorSet(); |
| 352 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | 355 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { |
| @@ -356,18 +359,23 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 356 | } | 359 | } |
| 357 | draw_params.Draw(cmdbuf, dld); | 360 | draw_params.Draw(cmdbuf, dld); |
| 358 | }); | 361 | }); |
| 362 | |||
| 363 | EndTransformFeedback(); | ||
| 359 | } | 364 | } |
| 360 | 365 | ||
| 361 | void RasterizerVulkan::Clear() { | 366 | void RasterizerVulkan::Clear() { |
| 362 | MICROPROFILE_SCOPE(Vulkan_Clearing); | 367 | MICROPROFILE_SCOPE(Vulkan_Clearing); |
| 363 | 368 | ||
| 364 | query_cache.UpdateCounters(); | ||
| 365 | |||
| 366 | const auto& gpu = system.GPU().Maxwell3D(); | 369 | const auto& gpu = system.GPU().Maxwell3D(); |
| 367 | if (!system.GPU().Maxwell3D().ShouldExecute()) { | 370 | if (!system.GPU().Maxwell3D().ShouldExecute()) { |
| 368 | return; | 371 | return; |
| 369 | } | 372 | } |
| 370 | 373 | ||
| 374 | sampled_views.clear(); | ||
| 375 | image_views.clear(); | ||
| 376 | |||
| 377 | query_cache.UpdateCounters(); | ||
| 378 | |||
| 371 | const auto& regs = gpu.regs; | 379 | const auto& regs = gpu.regs; |
| 372 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || | 380 | const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || |
| 373 | regs.clear_buffers.A; | 381 | regs.clear_buffers.A; |
| @@ -376,52 +384,54 @@ void RasterizerVulkan::Clear() { | |||
| 376 | if (!use_color && !use_depth && !use_stencil) { | 384 | if (!use_color && !use_depth && !use_stencil) { |
| 377 | return; | 385 | return; |
| 378 | } | 386 | } |
| 379 | // Clearing images requires to be out of a renderpass | ||
| 380 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 381 | 387 | ||
| 382 | // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. | 388 | [[maybe_unused]] const auto texceptions = UpdateAttachments(); |
| 389 | DEBUG_ASSERT(texceptions.none()); | ||
| 390 | SetupImageTransitions(0, color_attachments, zeta_attachment); | ||
| 383 | 391 | ||
| 384 | if (use_color) { | 392 | const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); |
| 385 | View color_view; | 393 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); |
| 386 | { | 394 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); |
| 387 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | 395 | |
| 388 | color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); | 396 | const auto& scissor = regs.scissor_test[0]; |
| 389 | } | 397 | const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); |
| 398 | vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; | ||
| 399 | scissor_extent.width = std::min(scissor_extent.width, render_area.width); | ||
| 400 | scissor_extent.height = std::min(scissor_extent.height, render_area.height); | ||
| 390 | 401 | ||
| 391 | color_view->Transition(vk::ImageLayout::eTransferDstOptimal, | 402 | const u32 layer = regs.clear_buffers.layer; |
| 392 | vk::PipelineStageFlagBits::eTransfer, | 403 | const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); |
| 393 | vk::AccessFlagBits::eTransferWrite); | ||
| 394 | 404 | ||
| 405 | if (use_color) { | ||
| 395 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | 406 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], |
| 396 | regs.clear_color[2], regs.clear_color[3]}; | 407 | regs.clear_color[2], regs.clear_color[3]}; |
| 397 | const vk::ClearColorValue clear(clear_color); | 408 | const vk::ClearValue clear_value{clear_color}; |
| 398 | scheduler.Record([image = color_view->GetImage(), | 409 | const u32 color_attachment = regs.clear_buffers.RT; |
| 399 | subresource = color_view->GetImageSubresourceRange(), | 410 | scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { |
| 400 | clear](auto cmdbuf, auto& dld) { | 411 | const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, |
| 401 | cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, | 412 | clear_value); |
| 402 | dld); | 413 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); |
| 403 | }); | 414 | }); |
| 404 | } | 415 | } |
| 405 | if (use_depth || use_stencil) { | ||
| 406 | View zeta_surface; | ||
| 407 | { | ||
| 408 | MICROPROFILE_SCOPE(Vulkan_RenderTargets); | ||
| 409 | zeta_surface = texture_cache.GetDepthBufferSurface(false); | ||
| 410 | } | ||
| 411 | 416 | ||
| 412 | zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, | 417 | if (!use_depth && !use_stencil) { |
| 413 | vk::PipelineStageFlagBits::eTransfer, | 418 | return; |
| 414 | vk::AccessFlagBits::eTransferWrite); | 419 | } |
| 415 | 420 | vk::ImageAspectFlags aspect_flags; | |
| 416 | const vk::ClearDepthStencilValue clear(regs.clear_depth, | 421 | if (use_depth) { |
| 417 | static_cast<u32>(regs.clear_stencil)); | 422 | aspect_flags |= vk::ImageAspectFlagBits::eDepth; |
| 418 | scheduler.Record([image = zeta_surface->GetImage(), | ||
| 419 | subresource = zeta_surface->GetImageSubresourceRange(), | ||
| 420 | clear](auto cmdbuf, auto& dld) { | ||
| 421 | cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, | ||
| 422 | subresource, dld); | ||
| 423 | }); | ||
| 424 | } | 423 | } |
| 424 | if (use_stencil) { | ||
| 425 | aspect_flags |= vk::ImageAspectFlagBits::eStencil; | ||
| 426 | } | ||
| 427 | |||
| 428 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | ||
| 429 | clear_rect, aspect_flags](auto cmdbuf, auto& dld) { | ||
| 430 | const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); | ||
| 431 | const vk::ClearValue clear_value{clear_zeta}; | ||
| 432 | const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); | ||
| 433 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); | ||
| 434 | }); | ||
| 425 | } | 435 | } |
| 426 | 436 | ||
| 427 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | 437 | void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { |
| @@ -538,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 538 | 548 | ||
| 539 | // Verify that the cached surface is the same size and format as the requested framebuffer | 549 | // Verify that the cached surface is the same size and format as the requested framebuffer |
| 540 | const auto& params{surface->GetSurfaceParams()}; | 550 | const auto& params{surface->GetSurfaceParams()}; |
| 541 | const auto& pixel_format{ | ||
| 542 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | ||
| 543 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | 551 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); |
| 544 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 552 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); |
| 545 | 553 | ||
| @@ -738,6 +746,44 @@ void RasterizerVulkan::UpdateDynamicStates() { | |||
| 738 | UpdateStencilFaces(regs); | 746 | UpdateStencilFaces(regs); |
| 739 | } | 747 | } |
| 740 | 748 | ||
| 749 | void RasterizerVulkan::BeginTransformFeedback() { | ||
| 750 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 751 | if (regs.tfb_enabled == 0) { | ||
| 752 | return; | ||
| 753 | } | ||
| 754 | |||
| 755 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | ||
| 756 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | ||
| 757 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | ||
| 758 | |||
| 759 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | ||
| 760 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 761 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 762 | |||
| 763 | const auto& binding = regs.tfb_bindings[0]; | ||
| 764 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 765 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 766 | |||
| 767 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 768 | const std::size_t size = binding.buffer_size; | ||
| 769 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 770 | |||
| 771 | scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { | ||
| 772 | cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); | ||
| 773 | cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); | ||
| 774 | }); | ||
| 775 | } | ||
| 776 | |||
| 777 | void RasterizerVulkan::EndTransformFeedback() { | ||
| 778 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 779 | if (regs.tfb_enabled == 0) { | ||
| 780 | return; | ||
| 781 | } | ||
| 782 | |||
| 783 | scheduler.Record( | ||
| 784 | [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); | ||
| 785 | } | ||
| 786 | |||
| 741 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 787 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| 742 | BufferBindings& buffer_bindings) { | 788 | BufferBindings& buffer_bindings) { |
| 743 | const auto& regs = system.GPU().Maxwell3D().regs; | 789 | const auto& regs = system.GPU().Maxwell3D().regs; |
| @@ -1109,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { | |||
| 1109 | // This implementation assumes that all attributes are used in the shader. | 1155 | // This implementation assumes that all attributes are used in the shader. |
| 1110 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | 1156 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; |
| 1111 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | 1157 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; |
| 1112 | DEBUG_ASSERT(end > start); | 1158 | DEBUG_ASSERT(end >= start); |
| 1113 | 1159 | ||
| 1114 | size += (end - start + 1) * regs.vertex_array[index].enable; | 1160 | size += (end - start + 1) * regs.vertex_array[index].enable; |
| 1115 | } | 1161 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 96ea05f0a..3185868e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -169,6 +169,10 @@ private: | |||
| 169 | 169 | ||
| 170 | void UpdateDynamicStates(); | 170 | void UpdateDynamicStates(); |
| 171 | 171 | ||
| 172 | void BeginTransformFeedback(); | ||
| 173 | |||
| 174 | void EndTransformFeedback(); | ||
| 175 | |||
| 172 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); | 176 | bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); |
| 173 | 177 | ||
| 174 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 178 | void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| @@ -249,6 +253,7 @@ private: | |||
| 249 | VKStagingBufferPool staging_pool; | 253 | VKStagingBufferPool staging_pool; |
| 250 | VKDescriptorPool descriptor_pool; | 254 | VKDescriptorPool descriptor_pool; |
| 251 | VKUpdateDescriptorQueue update_descriptor_queue; | 255 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 256 | VKRenderPassCache renderpass_cache; | ||
| 252 | QuadArrayPass quad_array_pass; | 257 | QuadArrayPass quad_array_pass; |
| 253 | Uint8Pass uint8_pass; | 258 | Uint8Pass uint8_pass; |
| 254 | 259 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index cfcca5af0..51ecb5567 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <functional> | 5 | #include <functional> |
| 6 | #include <limits> | 6 | #include <limits> |
| 7 | #include <map> | 7 | #include <map> |
| 8 | #include <optional> | ||
| 8 | #include <type_traits> | 9 | #include <type_traits> |
| 10 | #include <unordered_map> | ||
| 9 | #include <utility> | 11 | #include <utility> |
| 10 | 12 | ||
| 11 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| @@ -24,6 +26,7 @@ | |||
| 24 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 25 | #include "video_core/shader/node.h" | 27 | #include "video_core/shader/node.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 28 | #include "video_core/shader/shader_ir.h" |
| 29 | #include "video_core/shader/transform_feedback.h" | ||
| 27 | 30 | ||
| 28 | namespace Vulkan { | 31 | namespace Vulkan { |
| 29 | 32 | ||
| @@ -93,6 +96,12 @@ struct VertexIndices { | |||
| 93 | std::optional<u32> clip_distances; | 96 | std::optional<u32> clip_distances; |
| 94 | }; | 97 | }; |
| 95 | 98 | ||
| 99 | struct GenericVaryingDescription { | ||
| 100 | Id id = nullptr; | ||
| 101 | u32 first_element = 0; | ||
| 102 | bool is_scalar = false; | ||
| 103 | }; | ||
| 104 | |||
| 96 | spv::Dim GetSamplerDim(const Sampler& sampler) { | 105 | spv::Dim GetSamplerDim(const Sampler& sampler) { |
| 97 | ASSERT(!sampler.IsBuffer()); | 106 | ASSERT(!sampler.IsBuffer()); |
| 98 | switch (sampler.GetType()) { | 107 | switch (sampler.GetType()) { |
| @@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) { | |||
| 266 | class SPIRVDecompiler final : public Sirit::Module { | 275 | class SPIRVDecompiler final : public Sirit::Module { |
| 267 | public: | 276 | public: |
| 268 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, | 277 | explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage, |
| 269 | const Specialization& specialization) | 278 | const Registry& registry, const Specialization& specialization) |
| 270 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, | 279 | : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()}, |
| 271 | specialization{specialization} { | 280 | registry{registry}, specialization{specialization} { |
| 281 | if (stage != ShaderType::Compute) { | ||
| 282 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | ||
| 283 | } | ||
| 284 | |||
| 272 | AddCapability(spv::Capability::Shader); | 285 | AddCapability(spv::Capability::Shader); |
| 273 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); | 286 | AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess); |
| 274 | AddCapability(spv::Capability::ImageQuery); | 287 | AddCapability(spv::Capability::ImageQuery); |
| @@ -286,6 +299,15 @@ public: | |||
| 286 | AddExtension("SPV_KHR_variable_pointers"); | 299 | AddExtension("SPV_KHR_variable_pointers"); |
| 287 | AddExtension("SPV_KHR_shader_draw_parameters"); | 300 | AddExtension("SPV_KHR_shader_draw_parameters"); |
| 288 | 301 | ||
| 302 | if (!transform_feedback.empty()) { | ||
| 303 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 304 | AddCapability(spv::Capability::TransformFeedback); | ||
| 305 | } else { | ||
| 306 | LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not " | ||
| 307 | "supported on this device"); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 289 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { | 311 | if (ir.UsesLayer() || ir.UsesViewportIndex()) { |
| 290 | if (ir.UsesViewportIndex()) { | 312 | if (ir.UsesViewportIndex()) { |
| 291 | AddCapability(spv::Capability::MultiViewport); | 313 | AddCapability(spv::Capability::MultiViewport); |
| @@ -296,7 +318,7 @@ public: | |||
| 296 | } | 318 | } |
| 297 | } | 319 | } |
| 298 | 320 | ||
| 299 | if (device.IsShaderStorageImageReadWithoutFormatSupported()) { | 321 | if (device.IsFormatlessImageLoadSupported()) { |
| 300 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); | 322 | AddCapability(spv::Capability::StorageImageReadWithoutFormat); |
| 301 | } | 323 | } |
| 302 | 324 | ||
| @@ -318,25 +340,29 @@ public: | |||
| 318 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 340 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 319 | header.common2.threads_per_input_primitive); | 341 | header.common2.threads_per_input_primitive); |
| 320 | break; | 342 | break; |
| 321 | case ShaderType::TesselationEval: | 343 | case ShaderType::TesselationEval: { |
| 344 | const auto& info = registry.GetGraphicsInfo(); | ||
| 322 | AddCapability(spv::Capability::Tessellation); | 345 | AddCapability(spv::Capability::Tessellation); |
| 323 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); | 346 | AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces); |
| 324 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive)); | 347 | AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive)); |
| 325 | AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing)); | 348 | AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing)); |
| 326 | AddExecutionMode(main, specialization.tessellation.clockwise | 349 | AddExecutionMode(main, info.tessellation_clockwise |
| 327 | ? spv::ExecutionMode::VertexOrderCw | 350 | ? spv::ExecutionMode::VertexOrderCw |
| 328 | : spv::ExecutionMode::VertexOrderCcw); | 351 | : spv::ExecutionMode::VertexOrderCcw); |
| 329 | break; | 352 | break; |
| 330 | case ShaderType::Geometry: | 353 | } |
| 354 | case ShaderType::Geometry: { | ||
| 355 | const auto& info = registry.GetGraphicsInfo(); | ||
| 331 | AddCapability(spv::Capability::Geometry); | 356 | AddCapability(spv::Capability::Geometry); |
| 332 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); | 357 | AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces); |
| 333 | AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology)); | 358 | AddExecutionMode(main, GetExecutionMode(info.primitive_topology)); |
| 334 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); | 359 | AddExecutionMode(main, GetExecutionMode(header.common3.output_topology)); |
| 335 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, | 360 | AddExecutionMode(main, spv::ExecutionMode::OutputVertices, |
| 336 | header.common4.max_output_vertices); | 361 | header.common4.max_output_vertices); |
| 337 | // TODO(Rodrigo): Where can we get this info from? | 362 | // TODO(Rodrigo): Where can we get this info from? |
| 338 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); | 363 | AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U); |
| 339 | break; | 364 | break; |
| 365 | } | ||
| 340 | case ShaderType::Fragment: | 366 | case ShaderType::Fragment: |
| 341 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); | 367 | AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces); |
| 342 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | 368 | AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); |
| @@ -545,7 +571,8 @@ private: | |||
| 545 | if (stage != ShaderType::Geometry) { | 571 | if (stage != ShaderType::Geometry) { |
| 546 | return; | 572 | return; |
| 547 | } | 573 | } |
| 548 | const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 574 | const auto& info = registry.GetGraphicsInfo(); |
| 575 | const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology); | ||
| 549 | DeclareInputVertexArray(num_input); | 576 | DeclareInputVertexArray(num_input); |
| 550 | DeclareOutputVertex(); | 577 | DeclareOutputVertex(); |
| 551 | } | 578 | } |
| @@ -742,12 +769,34 @@ private: | |||
| 742 | } | 769 | } |
| 743 | 770 | ||
| 744 | void DeclareOutputAttributes() { | 771 | void DeclareOutputAttributes() { |
| 772 | if (stage == ShaderType::Compute || stage == ShaderType::Fragment) { | ||
| 773 | return; | ||
| 774 | } | ||
| 775 | |||
| 776 | UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex); | ||
| 745 | for (const auto index : ir.GetOutputAttributes()) { | 777 | for (const auto index : ir.GetOutputAttributes()) { |
| 746 | if (!IsGenericAttribute(index)) { | 778 | if (!IsGenericAttribute(index)) { |
| 747 | continue; | 779 | continue; |
| 748 | } | 780 | } |
| 749 | const u32 location = GetGenericAttributeLocation(index); | 781 | DeclareOutputAttribute(index); |
| 750 | Id type = t_float4; | 782 | } |
| 783 | } | ||
| 784 | |||
| 785 | void DeclareOutputAttribute(Attribute::Index index) { | ||
| 786 | static constexpr std::string_view swizzle = "xyzw"; | ||
| 787 | |||
| 788 | const u32 location = GetGenericAttributeLocation(index); | ||
| 789 | u8 element = 0; | ||
| 790 | while (element < 4) { | ||
| 791 | const std::size_t remainder = 4 - element; | ||
| 792 | |||
| 793 | std::size_t num_components = remainder; | ||
| 794 | const std::optional tfb = GetTransformFeedbackInfo(index, element); | ||
| 795 | if (tfb) { | ||
| 796 | num_components = tfb->components; | ||
| 797 | } | ||
| 798 | |||
| 799 | Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1); | ||
| 751 | Id varying_default = v_varying_default; | 800 | Id varying_default = v_varying_default; |
| 752 | if (IsOutputAttributeArray()) { | 801 | if (IsOutputAttributeArray()) { |
| 753 | const u32 num = GetNumOutputVertices(); | 802 | const u32 num = GetNumOutputVertices(); |
| @@ -760,13 +809,45 @@ private: | |||
| 760 | } | 809 | } |
| 761 | type = TypePointer(spv::StorageClass::Output, type); | 810 | type = TypePointer(spv::StorageClass::Output, type); |
| 762 | 811 | ||
| 812 | std::string name = fmt::format("out_attr{}", location); | ||
| 813 | if (num_components < 4 || element > 0) { | ||
| 814 | name = fmt::format("{}_{}", name, swizzle.substr(element, num_components)); | ||
| 815 | } | ||
| 816 | |||
| 763 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); | 817 | const Id id = OpVariable(type, spv::StorageClass::Output, varying_default); |
| 764 | Name(AddGlobalVariable(id), fmt::format("out_attr{}", location)); | 818 | Name(AddGlobalVariable(id), name); |
| 765 | output_attributes.emplace(index, id); | 819 | |
| 820 | GenericVaryingDescription description; | ||
| 821 | description.id = id; | ||
| 822 | description.first_element = element; | ||
| 823 | description.is_scalar = num_components == 1; | ||
| 824 | for (u32 i = 0; i < num_components; ++i) { | ||
| 825 | const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i); | ||
| 826 | output_attributes.emplace(offset, description); | ||
| 827 | } | ||
| 766 | interfaces.push_back(id); | 828 | interfaces.push_back(id); |
| 767 | 829 | ||
| 768 | Decorate(id, spv::Decoration::Location, location); | 830 | Decorate(id, spv::Decoration::Location, location); |
| 831 | if (element > 0) { | ||
| 832 | Decorate(id, spv::Decoration::Component, static_cast<u32>(element)); | ||
| 833 | } | ||
| 834 | if (tfb && device.IsExtTransformFeedbackSupported()) { | ||
| 835 | Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer)); | ||
| 836 | Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride)); | ||
| 837 | Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); | ||
| 838 | } | ||
| 839 | |||
| 840 | element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); | ||
| 841 | } | ||
| 842 | } | ||
| 843 | |||
| 844 | std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) { | ||
| 845 | const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); | ||
| 846 | const auto it = transform_feedback.find(location); | ||
| 847 | if (it == transform_feedback.end()) { | ||
| 848 | return {}; | ||
| 769 | } | 849 | } |
| 850 | return it->second; | ||
| 770 | } | 851 | } |
| 771 | 852 | ||
| 772 | u32 DeclareConstantBuffers(u32 binding) { | 853 | u32 DeclareConstantBuffers(u32 binding) { |
| @@ -898,7 +979,7 @@ private: | |||
| 898 | u32 GetNumInputVertices() const { | 979 | u32 GetNumInputVertices() const { |
| 899 | switch (stage) { | 980 | switch (stage) { |
| 900 | case ShaderType::Geometry: | 981 | case ShaderType::Geometry: |
| 901 | return GetNumPrimitiveTopologyVertices(specialization.primitive_topology); | 982 | return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology); |
| 902 | case ShaderType::TesselationControl: | 983 | case ShaderType::TesselationControl: |
| 903 | case ShaderType::TesselationEval: | 984 | case ShaderType::TesselationEval: |
| 904 | return NumInputPatches; | 985 | return NumInputPatches; |
| @@ -1346,8 +1427,14 @@ private: | |||
| 1346 | } | 1427 | } |
| 1347 | default: | 1428 | default: |
| 1348 | if (IsGenericAttribute(attribute)) { | 1429 | if (IsGenericAttribute(attribute)) { |
| 1349 | const Id composite = output_attributes.at(attribute); | 1430 | const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element); |
| 1350 | return {ArrayPass(t_out_float, composite, {element}), Type::Float}; | 1431 | const GenericVaryingDescription description = output_attributes.at(offset); |
| 1432 | const Id composite = description.id; | ||
| 1433 | std::vector<u32> indices; | ||
| 1434 | if (!description.is_scalar) { | ||
| 1435 | indices.push_back(element - description.first_element); | ||
| 1436 | } | ||
| 1437 | return {ArrayPass(t_out_float, composite, indices), Type::Float}; | ||
| 1351 | } | 1438 | } |
| 1352 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | 1439 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", |
| 1353 | static_cast<u32>(attribute)); | 1440 | static_cast<u32>(attribute)); |
| @@ -1793,7 +1880,7 @@ private: | |||
| 1793 | } | 1880 | } |
| 1794 | 1881 | ||
| 1795 | Expression ImageLoad(Operation operation) { | 1882 | Expression ImageLoad(Operation operation) { |
| 1796 | if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { | 1883 | if (!device.IsFormatlessImageLoadSupported()) { |
| 1797 | return {v_float_zero, Type::Float}; | 1884 | return {v_float_zero, Type::Float}; |
| 1798 | } | 1885 | } |
| 1799 | 1886 | ||
| @@ -2258,11 +2345,11 @@ private: | |||
| 2258 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { | 2345 | std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const { |
| 2259 | switch (type) { | 2346 | switch (type) { |
| 2260 | case Type::Float: | 2347 | case Type::Float: |
| 2261 | return {nullptr, t_float2, t_float3, t_float4}; | 2348 | return {t_float, t_float2, t_float3, t_float4}; |
| 2262 | case Type::Int: | 2349 | case Type::Int: |
| 2263 | return {nullptr, t_int2, t_int3, t_int4}; | 2350 | return {t_int, t_int2, t_int3, t_int4}; |
| 2264 | case Type::Uint: | 2351 | case Type::Uint: |
| 2265 | return {nullptr, t_uint2, t_uint3, t_uint4}; | 2352 | return {t_uint, t_uint2, t_uint3, t_uint4}; |
| 2266 | default: | 2353 | default: |
| 2267 | UNIMPLEMENTED(); | 2354 | UNIMPLEMENTED(); |
| 2268 | return {}; | 2355 | return {}; |
| @@ -2495,7 +2582,9 @@ private: | |||
| 2495 | const ShaderIR& ir; | 2582 | const ShaderIR& ir; |
| 2496 | const ShaderType stage; | 2583 | const ShaderType stage; |
| 2497 | const Tegra::Shader::Header header; | 2584 | const Tegra::Shader::Header header; |
| 2585 | const Registry& registry; | ||
| 2498 | const Specialization& specialization; | 2586 | const Specialization& specialization; |
| 2587 | std::unordered_map<u8, VaryingTFB> transform_feedback; | ||
| 2499 | 2588 | ||
| 2500 | const Id t_void = Name(TypeVoid(), "void"); | 2589 | const Id t_void = Name(TypeVoid(), "void"); |
| 2501 | 2590 | ||
| @@ -2584,7 +2673,7 @@ private: | |||
| 2584 | Id shared_memory{}; | 2673 | Id shared_memory{}; |
| 2585 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; | 2674 | std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{}; |
| 2586 | std::map<Attribute::Index, Id> input_attributes; | 2675 | std::map<Attribute::Index, Id> input_attributes; |
| 2587 | std::map<Attribute::Index, Id> output_attributes; | 2676 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2588 | std::map<u32, Id> constant_buffers; | 2677 | std::map<u32, Id> constant_buffers; |
| 2589 | std::map<GlobalMemoryBase, Id> global_buffers; | 2678 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2590 | std::map<u32, TexelBuffer> texel_buffers; | 2679 | std::map<u32, TexelBuffer> texel_buffers; |
| @@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2870 | } | 2959 | } |
| 2871 | 2960 | ||
| 2872 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 2961 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 2873 | ShaderType stage, const Specialization& specialization) { | 2962 | ShaderType stage, const VideoCommon::Shader::Registry& registry, |
| 2874 | return SPIRVDecompiler(device, ir, stage, specialization).Assemble(); | 2963 | const Specialization& specialization) { |
| 2964 | return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble(); | ||
| 2875 | } | 2965 | } |
| 2876 | 2966 | ||
| 2877 | } // namespace Vulkan | 2967 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f5dc14d9e..ffea4709e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 17 | #include "video_core/engines/shader_type.h" | 17 | #include "video_core/engines/shader_type.h" |
| 18 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | 19 | #include "video_core/shader/shader_ir.h" |
| 19 | 20 | ||
| 20 | namespace Vulkan { | 21 | namespace Vulkan { |
| @@ -91,17 +92,9 @@ struct Specialization final { | |||
| 91 | u32 shared_memory_size{}; | 92 | u32 shared_memory_size{}; |
| 92 | 93 | ||
| 93 | // Graphics specific | 94 | // Graphics specific |
| 94 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 95 | std::optional<float> point_size{}; | 95 | std::optional<float> point_size{}; |
| 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 97 | bool ndc_minus_one_to_one{}; | 97 | bool ndc_minus_one_to_one{}; |
| 98 | |||
| 99 | // Tessellation specific | ||
| 100 | struct { | ||
| 101 | Maxwell::TessellationPrimitive primitive{}; | ||
| 102 | Maxwell::TessellationSpacing spacing{}; | ||
| 103 | bool clockwise{}; | ||
| 104 | } tessellation; | ||
| 105 | }; | 98 | }; |
| 106 | // Old gcc versions don't consider this trivially copyable. | 99 | // Old gcc versions don't consider this trivially copyable. |
| 107 | // static_assert(std::is_trivially_copyable_v<Specialization>); | 100 | // static_assert(std::is_trivially_copyable_v<Specialization>); |
| @@ -114,6 +107,8 @@ struct SPIRVShader { | |||
| 114 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); | 107 | ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir); |
| 115 | 108 | ||
| 116 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, | 109 | std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, |
| 117 | Tegra::Engines::ShaderType stage, const Specialization& specialization); | 110 | Tegra::Engines::ShaderType stage, |
| 111 | const VideoCommon::Shader::Registry& registry, | ||
| 112 | const Specialization& specialization); | ||
| 118 | 113 | ||
| 119 | } // namespace Vulkan | 114 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index d9ea3cc21..374959f82 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -100,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) { | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { | 102 | u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { |
| 103 | static constexpr u64 epochs_to_destroy = 180; | ||
| 104 | static constexpr std::size_t deletions_per_tick = 16; | 103 | static constexpr std::size_t deletions_per_tick = 16; |
| 105 | 104 | ||
| 106 | auto& staging = cache[log2]; | 105 | auto& staging = cache[log2]; |
| @@ -108,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo | |||
| 108 | const std::size_t old_size = entries.size(); | 107 | const std::size_t old_size = entries.size(); |
| 109 | 108 | ||
| 110 | const auto is_deleteable = [this](const auto& entry) { | 109 | const auto is_deleteable = [this](const auto& entry) { |
| 110 | static constexpr u64 epochs_to_destroy = 180; | ||
| 111 | return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); | 111 | return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); |
| 112 | }; | 112 | }; |
| 113 | const std::size_t begin_offset = staging.delete_index; | 113 | const std::size_t begin_offset = staging.delete_index; |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index d74e68b63..94a89e388 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -90,8 +90,6 @@ void StateTracker::Initialize() { | |||
| 90 | SetupDirtyBlendConstants(tables); | 90 | SetupDirtyBlendConstants(tables); |
| 91 | SetupDirtyDepthBounds(tables); | 91 | SetupDirtyDepthBounds(tables); |
| 92 | SetupDirtyStencilProperties(tables); | 92 | SetupDirtyStencilProperties(tables); |
| 93 | |||
| 94 | SetupCommonOnWriteStores(dirty.on_write_stores); | ||
| 95 | } | 93 | } |
| 96 | 94 | ||
| 97 | void StateTracker::InvalidateCommandBufferState() { | 95 | void StateTracker::InvalidateCommandBufferState() { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 73d92a5ae..26175921b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -52,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { | |||
| 52 | return vk::ImageType::e2D; | 52 | return vk::ImageType::e2D; |
| 53 | case SurfaceTarget::Texture3D: | 53 | case SurfaceTarget::Texture3D: |
| 54 | return vk::ImageType::e3D; | 54 | return vk::ImageType::e3D; |
| 55 | case SurfaceTarget::TextureBuffer: | ||
| 56 | UNREACHABLE(); | ||
| 57 | return {}; | ||
| 55 | } | 58 | } |
| 56 | UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); | 59 | UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); |
| 57 | return {}; | 60 | return {}; |
| @@ -273,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | |||
| 273 | 276 | ||
| 274 | for (u32 level = 0; level < params.num_levels; ++level) { | 277 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 275 | vk::BufferImageCopy copy = GetBufferImageCopy(level); | 278 | vk::BufferImageCopy copy = GetBufferImageCopy(level); |
| 276 | const auto& dld = device.GetDispatchLoader(); | ||
| 277 | if (image->GetAspectMask() == | 279 | if (image->GetAspectMask() == |
| 278 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | 280 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { |
| 279 | vk::BufferImageCopy depth = copy; | 281 | vk::BufferImageCopy depth = copy; |
| @@ -422,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | |||
| 422 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, | 424 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, |
| 423 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); | 425 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); |
| 424 | 426 | ||
| 425 | const auto& dld{device.GetDispatchLoader()}; | ||
| 426 | const vk::ImageSubresourceLayers src_subresource( | 427 | const vk::ImageSubresourceLayers src_subresource( |
| 427 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); | 428 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); |
| 428 | const vk::ImageSubresourceLayers dst_subresource( | 429 | const vk::ImageSubresourceLayers dst_subresource( |
| @@ -458,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | |||
| 458 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); | 459 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); |
| 459 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | 460 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; |
| 460 | 461 | ||
| 461 | const auto& dld{device.GetDispatchLoader()}; | ||
| 462 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | 462 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, |
| 463 | is_linear](auto cmdbuf, auto& dld) { | 463 | is_linear](auto cmdbuf, auto& dld) { |
| 464 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | 464 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp deleted file mode 100644 index 0638be8cb..000000000 --- a/src/video_core/shader/const_buffer_locker.cpp +++ /dev/null | |||
| @@ -1,126 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/engines/shader_type.h" | ||
| 11 | #include "video_core/shader/const_buffer_locker.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Engines::SamplerDescriptor; | ||
| 16 | |||
| 17 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | ||
| 18 | : stage{shader_stage} {} | ||
| 19 | |||
| 20 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 21 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 22 | : stage{shader_stage}, engine{&engine} {} | ||
| 23 | |||
| 24 | ConstBufferLocker::~ConstBufferLocker() = default; | ||
| 25 | |||
| 26 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | ||
| 27 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 28 | const auto iter = keys.find(key); | ||
| 29 | if (iter != keys.end()) { | ||
| 30 | return iter->second; | ||
| 31 | } | ||
| 32 | if (!engine) { | ||
| 33 | return std::nullopt; | ||
| 34 | } | ||
| 35 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 36 | keys.emplace(key, value); | ||
| 37 | return value; | ||
| 38 | } | ||
| 39 | |||
| 40 | std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||
| 41 | const u32 key = offset; | ||
| 42 | const auto iter = bound_samplers.find(key); | ||
| 43 | if (iter != bound_samplers.end()) { | ||
| 44 | return iter->second; | ||
| 45 | } | ||
| 46 | if (!engine) { | ||
| 47 | return std::nullopt; | ||
| 48 | } | ||
| 49 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 50 | bound_samplers.emplace(key, value); | ||
| 51 | return value; | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||
| 55 | u32 buffer, u32 offset) { | ||
| 56 | const std::pair key = {buffer, offset}; | ||
| 57 | const auto iter = bindless_samplers.find(key); | ||
| 58 | if (iter != bindless_samplers.end()) { | ||
| 59 | return iter->second; | ||
| 60 | } | ||
| 61 | if (!engine) { | ||
| 62 | return std::nullopt; | ||
| 63 | } | ||
| 64 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 65 | bindless_samplers.emplace(key, value); | ||
| 66 | return value; | ||
| 67 | } | ||
| 68 | |||
| 69 | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||
| 70 | if (bound_buffer_saved) { | ||
| 71 | return bound_buffer; | ||
| 72 | } | ||
| 73 | if (!engine) { | ||
| 74 | return std::nullopt; | ||
| 75 | } | ||
| 76 | bound_buffer_saved = true; | ||
| 77 | bound_buffer = engine->GetBoundBuffer(); | ||
| 78 | return bound_buffer; | ||
| 79 | } | ||
| 80 | |||
| 81 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 82 | keys.insert_or_assign({buffer, offset}, value); | ||
| 83 | } | ||
| 84 | |||
| 85 | void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 86 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 87 | } | ||
| 88 | |||
| 89 | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 90 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 91 | } | ||
| 92 | |||
| 93 | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||
| 94 | bound_buffer_saved = true; | ||
| 95 | bound_buffer = buffer; | ||
| 96 | } | ||
| 97 | |||
| 98 | bool ConstBufferLocker::IsConsistent() const { | ||
| 99 | if (!engine) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | return std::all_of(keys.begin(), keys.end(), | ||
| 103 | [this](const auto& pair) { | ||
| 104 | const auto [cbuf, offset] = pair.first; | ||
| 105 | const auto value = pair.second; | ||
| 106 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 107 | }) && | ||
| 108 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 109 | [this](const auto& sampler) { | ||
| 110 | const auto [key, value] = sampler; | ||
| 111 | return value == engine->AccessBoundSampler(stage, key); | ||
| 112 | }) && | ||
| 113 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 114 | [this](const auto& sampler) { | ||
| 115 | const auto [cbuf, offset] = sampler.first; | ||
| 116 | const auto value = sampler.second; | ||
| 117 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 118 | }); | ||
| 119 | } | ||
| 120 | |||
| 121 | bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { | ||
| 122 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 123 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h deleted file mode 100644 index d3ea11087..000000000 --- a/src/video_core/shader/const_buffer_locker.h +++ /dev/null | |||
| @@ -1,103 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/hash.h" | ||
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/guest_driver.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 18 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 19 | using BindlessSamplerMap = | ||
| 20 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 21 | |||
| 22 | /** | ||
| 23 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | ||
| 24 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk | ||
| 25 | * shader compilation. | ||
| 26 | */ | ||
| 27 | class ConstBufferLocker { | ||
| 28 | public: | ||
| 29 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | ||
| 30 | |||
| 31 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 32 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 33 | |||
| 34 | ~ConstBufferLocker(); | ||
| 35 | |||
| 36 | /// Retrieves a key from the locker, if it's registered, it will give the registered value, if | ||
| 37 | /// not it will obtain it from maxwell3d and register it. | ||
| 38 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 39 | |||
| 40 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 41 | |||
| 42 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 43 | |||
| 44 | std::optional<u32> ObtainBoundBuffer(); | ||
| 45 | |||
| 46 | /// Inserts a key. | ||
| 47 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 48 | |||
| 49 | /// Inserts a bound sampler key. | ||
| 50 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 51 | |||
| 52 | /// Inserts a bindless sampler key. | ||
| 53 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 54 | |||
| 55 | /// Set the bound buffer for this locker. | ||
| 56 | void SetBoundBuffer(u32 buffer); | ||
| 57 | |||
| 58 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | ||
| 59 | /// the same value, false otherwise; | ||
| 60 | bool IsConsistent() const; | ||
| 61 | |||
| 62 | /// Returns true if the keys are equal to the other ones in the locker. | ||
| 63 | bool HasEqualKeys(const ConstBufferLocker& rhs) const; | ||
| 64 | |||
| 65 | /// Gives an getter to the const buffer keys in the database. | ||
| 66 | const KeyMap& GetKeys() const { | ||
| 67 | return keys; | ||
| 68 | } | ||
| 69 | |||
| 70 | /// Gets samplers database. | ||
| 71 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 72 | return bound_samplers; | ||
| 73 | } | ||
| 74 | |||
| 75 | /// Gets bindless samplers database. | ||
| 76 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 77 | return bindless_samplers; | ||
| 78 | } | ||
| 79 | |||
| 80 | /// Gets bound buffer used on this shader | ||
| 81 | u32 GetBoundBuffer() const { | ||
| 82 | return bound_buffer; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Obtains access to the guest driver's profile. | ||
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||
| 87 | if (engine) { | ||
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | const Tegra::Engines::ShaderType stage; | ||
| 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 96 | KeyMap keys; | ||
| 97 | BoundSamplerMap bound_samplers; | ||
| 98 | BindlessSamplerMap bindless_samplers; | ||
| 99 | bool bound_buffer_saved{}; | ||
| 100 | u32 bound_buffer{}; | ||
| 101 | }; | ||
| 102 | |||
| 103 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 0229733b6..2e2711350 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/shader/ast.h" | 14 | #include "video_core/shader/ast.h" |
| 15 | #include "video_core/shader/control_flow.h" | 15 | #include "video_core/shader/control_flow.h" |
| 16 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | 17 | #include "video_core/shader/shader_ir.h" |
| 17 | 18 | ||
| 18 | namespace VideoCommon::Shader { | 19 | namespace VideoCommon::Shader { |
| @@ -64,11 +65,11 @@ struct BlockInfo { | |||
| 64 | }; | 65 | }; |
| 65 | 66 | ||
| 66 | struct CFGRebuildState { | 67 | struct CFGRebuildState { |
| 67 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | 68 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, Registry& registry) |
| 68 | : program_code{program_code}, locker{locker}, start{start} {} | 69 | : program_code{program_code}, registry{registry}, start{start} {} |
| 69 | 70 | ||
| 70 | const ProgramCode& program_code; | 71 | const ProgramCode& program_code; |
| 71 | ConstBufferLocker& locker; | 72 | Registry& registry; |
| 72 | u32 start{}; | 73 | u32 start{}; |
| 73 | std::vector<BlockInfo> block_info; | 74 | std::vector<BlockInfo> block_info; |
| 74 | std::list<u32> inspect_queries; | 75 | std::list<u32> inspect_queries; |
| @@ -438,7 +439,7 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 438 | const s32 pc_target = offset + result.relative_position; | 439 | const s32 pc_target = offset + result.relative_position; |
| 439 | std::vector<CaseBranch> branches; | 440 | std::vector<CaseBranch> branches; |
| 440 | for (u32 i = 0; i < result.entries; i++) { | 441 | for (u32 i = 0; i < result.entries; i++) { |
| 441 | auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4); | 442 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); |
| 442 | if (!key) { | 443 | if (!key) { |
| 443 | return {ParseResult::AbnormalFlow, parse_info}; | 444 | return {ParseResult::AbnormalFlow, parse_info}; |
| 444 | } | 445 | } |
| @@ -656,14 +657,14 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 656 | 657 | ||
| 657 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 658 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 658 | const CompilerSettings& settings, | 659 | const CompilerSettings& settings, |
| 659 | ConstBufferLocker& locker) { | 660 | Registry& registry) { |
| 660 | auto result_out = std::make_unique<ShaderCharacteristics>(); | 661 | auto result_out = std::make_unique<ShaderCharacteristics>(); |
| 661 | if (settings.depth == CompileDepth::BruteForce) { | 662 | if (settings.depth == CompileDepth::BruteForce) { |
| 662 | result_out->settings.depth = CompileDepth::BruteForce; | 663 | result_out->settings.depth = CompileDepth::BruteForce; |
| 663 | return result_out; | 664 | return result_out; |
| 664 | } | 665 | } |
| 665 | 666 | ||
| 666 | CFGRebuildState state{program_code, start_address, locker}; | 667 | CFGRebuildState state{program_code, start_address, registry}; |
| 667 | // Inspect Code and generate blocks | 668 | // Inspect Code and generate blocks |
| 668 | state.labels.clear(); | 669 | state.labels.clear(); |
| 669 | state.labels.emplace(start_address); | 670 | state.labels.emplace(start_address); |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 5304998b9..62a3510d8 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/ast.h" | 13 | #include "video_core/shader/ast.h" |
| 14 | #include "video_core/shader/compiler_settings.h" | 14 | #include "video_core/shader/compiler_settings.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -111,6 +112,6 @@ struct ShaderCharacteristics { | |||
| 111 | 112 | ||
| 112 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 113 | const CompilerSettings& settings, | 114 | const CompilerSettings& settings, |
| 114 | ConstBufferLocker& locker); | 115 | Registry& registry); |
| 115 | 116 | ||
| 116 | } // namespace VideoCommon::Shader | 117 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 6b697ed5d..87ac9ac6c 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -34,13 +34,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 34 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, |
| 38 | const std::list<Sampler>& used_samplers) { | 38 | const std::list<Sampler>& used_samplers) { |
| 39 | if (gpu_driver == nullptr) { | 39 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { |
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | 40 | return; |
| 45 | } | 41 | } |
| 46 | u32 count{}; | 42 | u32 count{}; |
| @@ -53,17 +49,13 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | |||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | 49 | bound_offsets.emplace_back(sampler.GetOffset()); |
| 54 | } | 50 | } |
| 55 | if (count > 1) { | 51 | if (count > 1) { |
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | 52 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); |
| 57 | } | 53 | } |
| 58 | } | 54 | } |
| 59 | 55 | ||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | 56 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, |
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | 57 | VideoCore::GuestDriverProfile& gpu_driver, |
| 62 | const std::list<Sampler>& used_samplers) { | 58 | const std::list<Sampler>& used_samplers) { |
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | 59 | const u32 base_offset = sampler_to_deduce.GetOffset(); |
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | 60 | u32 max_offset{std::numeric_limits<u32>::max()}; |
| 69 | for (const auto& sampler : used_samplers) { | 61 | for (const auto& sampler : used_samplers) { |
| @@ -77,7 +69,7 @@ std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | |||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | 69 | if (max_offset == std::numeric_limits<u32>::max()) { |
| 78 | return std::nullopt; | 70 | return std::nullopt; |
| 79 | } | 71 | } |
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | 72 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); |
| 81 | } | 73 | } |
| 82 | 74 | ||
| 83 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| @@ -149,7 +141,7 @@ void ShaderIR::Decode() { | |||
| 149 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 141 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 150 | 142 | ||
| 151 | decompiled = false; | 143 | decompiled = false; |
| 152 | auto info = ScanFlow(program_code, main_offset, settings, locker); | 144 | auto info = ScanFlow(program_code, main_offset, settings, registry); |
| 153 | auto& shader_info = *info; | 145 | auto& shader_info = *info; |
| 154 | coverage_begin = shader_info.start; | 146 | coverage_begin = shader_info.start; |
| 155 | coverage_end = shader_info.end; | 147 | coverage_end = shader_info.end; |
| @@ -364,7 +356,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 364 | 356 | ||
| 365 | void ShaderIR::PostDecode() { | 357 | void ShaderIR::PostDecode() { |
| 366 | // Deduce texture handler size if needed | 358 | // Deduce texture handler size if needed |
| 367 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 359 | auto gpu_driver = registry.AccessGuestDriverProfile(); |
| 368 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | 360 | DeduceTextureHandlerSize(gpu_driver, used_samplers); |
| 369 | // Deduce Indexed Samplers | 361 | // Deduce Indexed Samplers |
| 370 | if (!uses_indexed_samplers) { | 362 | if (!uses_indexed_samplers) { |
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index e02bcd097..8e3b46e8e 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp | |||
| @@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | |||
| 17 | const Instruction instr = {program_code[pc]}; | 17 | const Instruction instr = {program_code[pc]}; |
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | 20 | Node op_a = GetRegister(instr.gpr8); |
| 23 | op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); | 21 | Node op_b = [&] { |
| 24 | 22 | switch (opcode->get().GetId()) { | |
| 25 | switch (opcode->get().GetId()) { | 23 | case OpCode::Id::BFE_R: |
| 26 | case OpCode::Id::BFE_IMM: { | 24 | return GetRegister(instr.gpr20); |
| 27 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | 25 | case OpCode::Id::BFE_C: |
| 28 | "Condition codes generation in BFE is not implemented"); | 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); |
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 29 | 34 | ||
| 30 | const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); | 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); |
| 31 | const Node outer_shift_imm = | ||
| 32 | Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); | ||
| 33 | 36 | ||
| 34 | const Node inner_shift = | 37 | const bool is_signed = instr.bfe.is_signed; |
| 35 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); | ||
| 36 | const Node outer_shift = | ||
| 37 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); | ||
| 38 | 38 | ||
| 39 | SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); | 39 | // using reverse parallel method in |
| 40 | SetRegister(bb, instr.gpr0, outer_shift); | 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel |
| 41 | break; | 41 | // note for later if possible to implement faster method. |
| 42 | } | 42 | if (instr.bfe.brev) { |
| 43 | default: | 43 | const auto swap = [&](u32 s, u32 mask) { |
| 44 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | 44 | Node v1 = |
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 45 | } | 65 | } |
| 46 | 66 | ||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 47 | return pc; | 74 | return pc; |
| 48 | } | 75 | } |
| 49 | 76 | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index bee7d8cad..48350e042 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 14 | #include "video_core/shader/node_helper.h" | 14 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/registry.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | 16 | #include "video_core/shader/shader_ir.h" |
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| @@ -359,8 +360,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(std::optional<SamplerInfo> sample | |||
| 359 | if (sampler_info) { | 360 | if (sampler_info) { |
| 360 | return *sampler_info; | 361 | return *sampler_info; |
| 361 | } | 362 | } |
| 362 | const auto sampler = | 363 | const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) |
| 363 | buffer ? locker.ObtainBindlessSampler(*buffer, offset) : locker.ObtainBoundSampler(offset); | 364 | : registry.ObtainBoundSampler(offset); |
| 364 | if (!sampler) { | 365 | if (!sampler) { |
| 365 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 366 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 366 | return SamplerInfo{TextureType::Texture2D, false, false, false}; | 367 | return SamplerInfo{TextureType::Texture2D, false, false, false}; |
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index b3dcd291c..76c56abb5 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp | |||
| @@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) | |||
| 68 | return OperationCode::UBitwiseXor; | 68 | return OperationCode::UBitwiseXor; |
| 69 | case OperationCode::IBitwiseNot: | 69 | case OperationCode::IBitwiseNot: |
| 70 | return OperationCode::UBitwiseNot; | 70 | return OperationCode::UBitwiseNot; |
| 71 | case OperationCode::IBitfieldExtract: | ||
| 72 | return OperationCode::UBitfieldExtract; | ||
| 71 | case OperationCode::IBitfieldInsert: | 73 | case OperationCode::IBitfieldInsert: |
| 72 | return OperationCode::UBitfieldInsert; | 74 | return OperationCode::UBitfieldInsert; |
| 73 | case OperationCode::IBitCount: | 75 | case OperationCode::IBitCount: |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp new file mode 100644 index 000000000..af70b3f35 --- /dev/null +++ b/src/video_core/shader/registry.cpp | |||
| @@ -0,0 +1,161 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 28 | |||
| 29 | GraphicsInfo info; | ||
| 30 | info.tfb_layouts = graphics.regs.tfb_layouts; | ||
| 31 | info.tfb_varying_locs = graphics.regs.tfb_varying_locs; | ||
| 32 | info.primitive_topology = graphics.regs.draw.topology; | ||
| 33 | info.tessellation_primitive = graphics.regs.tess_mode.prim; | ||
| 34 | info.tessellation_spacing = graphics.regs.tess_mode.spacing; | ||
| 35 | info.tfb_enabled = graphics.regs.tfb_enabled; | ||
| 36 | info.tessellation_clockwise = graphics.regs.tess_mode.cw; | ||
| 37 | return info; | ||
| 38 | } | ||
| 39 | |||
| 40 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 41 | if (shader_stage != ShaderType::Compute) { | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 45 | const auto& launch = compute.launch_description; | ||
| 46 | |||
| 47 | ComputeInfo info; | ||
| 48 | info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; | ||
| 49 | info.local_memory_size_in_words = launch.local_pos_alloc; | ||
| 50 | info.shared_memory_size_in_words = launch.shared_alloc; | ||
| 51 | return info; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // Anonymous namespace | ||
| 55 | |||
| 56 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 57 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 58 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 59 | |||
| 60 | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 61 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 62 | : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, | ||
| 63 | graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( | ||
| 64 | shader_stage, engine)} {} | ||
| 65 | |||
| 66 | Registry::~Registry() = default; | ||
| 67 | |||
| 68 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 69 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 70 | const auto iter = keys.find(key); | ||
| 71 | if (iter != keys.end()) { | ||
| 72 | return iter->second; | ||
| 73 | } | ||
| 74 | if (!engine) { | ||
| 75 | return std::nullopt; | ||
| 76 | } | ||
| 77 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 78 | keys.emplace(key, value); | ||
| 79 | return value; | ||
| 80 | } | ||
| 81 | |||
| 82 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 83 | const u32 key = offset; | ||
| 84 | const auto iter = bound_samplers.find(key); | ||
| 85 | if (iter != bound_samplers.end()) { | ||
| 86 | return iter->second; | ||
| 87 | } | ||
| 88 | if (!engine) { | ||
| 89 | return std::nullopt; | ||
| 90 | } | ||
| 91 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 92 | bound_samplers.emplace(key, value); | ||
| 93 | return value; | ||
| 94 | } | ||
| 95 | |||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, | ||
| 97 | u32 offset) { | ||
| 98 | const std::pair key = {buffer, offset}; | ||
| 99 | const auto iter = bindless_samplers.find(key); | ||
| 100 | if (iter != bindless_samplers.end()) { | ||
| 101 | return iter->second; | ||
| 102 | } | ||
| 103 | if (!engine) { | ||
| 104 | return std::nullopt; | ||
| 105 | } | ||
| 106 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 107 | bindless_samplers.emplace(key, value); | ||
| 108 | return value; | ||
| 109 | } | ||
| 110 | |||
| 111 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 112 | keys.insert_or_assign({buffer, offset}, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 116 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 117 | } | ||
| 118 | |||
| 119 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 120 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool Registry::IsConsistent() const { | ||
| 124 | if (!engine) { | ||
| 125 | return true; | ||
| 126 | } | ||
| 127 | return std::all_of(keys.begin(), keys.end(), | ||
| 128 | [this](const auto& pair) { | ||
| 129 | const auto [cbuf, offset] = pair.first; | ||
| 130 | const auto value = pair.second; | ||
| 131 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 132 | }) && | ||
| 133 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 134 | [this](const auto& sampler) { | ||
| 135 | const auto [key, value] = sampler; | ||
| 136 | return value == engine->AccessBoundSampler(stage, key); | ||
| 137 | }) && | ||
| 138 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 139 | [this](const auto& sampler) { | ||
| 140 | const auto [cbuf, offset] = sampler.first; | ||
| 141 | const auto value = sampler.second; | ||
| 142 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 143 | }); | ||
| 144 | } | ||
| 145 | |||
| 146 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 147 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 148 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 149 | } | ||
| 150 | |||
| 151 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 152 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 153 | return graphics_info; | ||
| 154 | } | ||
| 155 | |||
| 156 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 157 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 158 | return compute_info; | ||
| 159 | } | ||
| 160 | |||
| 161 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h new file mode 100644 index 000000000..0c80d35fd --- /dev/null +++ b/src/video_core/shader/registry.h | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 23 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 24 | using BindlessSamplerMap = | ||
| 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 26 | |||
| 27 | struct GraphicsInfo { | ||
| 28 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 29 | |||
| 30 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 31 | tfb_layouts{}; | ||
| 32 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 33 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 34 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 35 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 36 | bool tfb_enabled = false; | ||
| 37 | bool tessellation_clockwise = false; | ||
| 38 | }; | ||
| 39 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 40 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 41 | |||
| 42 | struct ComputeInfo { | ||
| 43 | std::array<u32, 3> workgroup_size{}; | ||
| 44 | u32 shared_memory_size_in_words = 0; | ||
| 45 | u32 local_memory_size_in_words = 0; | ||
| 46 | }; | ||
| 47 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 48 | |||
| 49 | struct SerializedRegistryInfo { | ||
| 50 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 51 | u32 bound_buffer = 0; | ||
| 52 | GraphicsInfo graphics; | ||
| 53 | ComputeInfo compute; | ||
| 54 | }; | ||
| 55 | |||
| 56 | /** | ||
| 57 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 58 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 59 | * compilation. | ||
| 60 | */ | ||
| 61 | class Registry { | ||
| 62 | public: | ||
| 63 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 64 | |||
| 65 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 66 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 67 | |||
| 68 | ~Registry(); | ||
| 69 | |||
| 70 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 71 | /// not it will obtain it from maxwell3d and register it. | ||
| 72 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 73 | |||
| 74 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 75 | |||
| 76 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 77 | |||
| 78 | /// Inserts a key. | ||
| 79 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 80 | |||
| 81 | /// Inserts a bound sampler key. | ||
| 82 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 83 | |||
| 84 | /// Inserts a bindless sampler key. | ||
| 85 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 86 | |||
| 87 | /// Checks keys and samplers against engine's current const buffers. | ||
| 88 | /// Returns true if they are the same value, false otherwise. | ||
| 89 | bool IsConsistent() const; | ||
| 90 | |||
| 91 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 92 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 93 | |||
| 94 | /// Returns graphics information from this shader | ||
| 95 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 96 | |||
| 97 | /// Returns compute information from this shader | ||
| 98 | const ComputeInfo& GetComputeInfo() const; | ||
| 99 | |||
| 100 | /// Gives an getter to the const buffer keys in the database. | ||
| 101 | const KeyMap& GetKeys() const { | ||
| 102 | return keys; | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Gets samplers database. | ||
| 106 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 107 | return bound_samplers; | ||
| 108 | } | ||
| 109 | |||
| 110 | /// Gets bindless samplers database. | ||
| 111 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 112 | return bindless_samplers; | ||
| 113 | } | ||
| 114 | |||
| 115 | /// Gets bound buffer used on this shader | ||
| 116 | u32 GetBoundBuffer() const { | ||
| 117 | return bound_buffer; | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Obtains access to the guest driver's profile. | ||
| 121 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 122 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 123 | } | ||
| 124 | |||
| 125 | private: | ||
| 126 | const Tegra::Engines::ShaderType stage; | ||
| 127 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 128 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 129 | KeyMap keys; | ||
| 130 | BoundSamplerMap bound_samplers; | ||
| 131 | BindlessSamplerMap bindless_samplers; | ||
| 132 | u32 bound_buffer; | ||
| 133 | GraphicsInfo graphics_info; | ||
| 134 | ComputeInfo compute_info; | ||
| 135 | }; | ||
| 136 | |||
| 137 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 3a5d280a9..425927777 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 13 | #include "video_core/shader/node_helper.h" | 13 | #include "video_core/shader/node_helper.h" |
| 14 | #include "video_core/shader/registry.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | 15 | #include "video_core/shader/shader_ir.h" |
| 15 | 16 | ||
| 16 | namespace VideoCommon::Shader { | 17 | namespace VideoCommon::Shader { |
| @@ -24,8 +25,8 @@ using Tegra::Shader::PredOperation; | |||
| 24 | using Tegra::Shader::Register; | 25 | using Tegra::Shader::Register; |
| 25 | 26 | ||
| 26 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 27 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 27 | ConstBufferLocker& locker) | 28 | Registry& registry) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 29 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} { |
| 29 | Decode(); | 30 | Decode(); |
| 30 | PostDecode(); | 31 | PostDecode(); |
| 31 | } | 32 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b0851c3be..dde036b40 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | #include "video_core/engines/shader_header.h" | 18 | #include "video_core/engines/shader_header.h" |
| 19 | #include "video_core/shader/ast.h" | 19 | #include "video_core/shader/ast.h" |
| 20 | #include "video_core/shader/compiler_settings.h" | 20 | #include "video_core/shader/compiler_settings.h" |
| 21 | #include "video_core/shader/const_buffer_locker.h" | ||
| 22 | #include "video_core/shader/node.h" | 21 | #include "video_core/shader/node.h" |
| 22 | #include "video_core/shader/registry.h" | ||
| 23 | 23 | ||
| 24 | namespace VideoCommon::Shader { | 24 | namespace VideoCommon::Shader { |
| 25 | 25 | ||
| @@ -69,7 +69,7 @@ struct GlobalMemoryUsage { | |||
| 69 | class ShaderIR final { | 69 | class ShaderIR final { |
| 70 | public: | 70 | public: |
| 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | 71 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 72 | ConstBufferLocker& locker); | 72 | Registry& registry); |
| 73 | ~ShaderIR(); | 73 | ~ShaderIR(); |
| 74 | 74 | ||
| 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 75 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -414,7 +414,7 @@ private: | |||
| 414 | const ProgramCode& program_code; | 414 | const ProgramCode& program_code; |
| 415 | const u32 main_offset; | 415 | const u32 main_offset; |
| 416 | const CompilerSettings settings; | 416 | const CompilerSettings settings; |
| 417 | ConstBufferLocker& locker; | 417 | Registry& registry; |
| 418 | 418 | ||
| 419 | bool decompiled{}; | 419 | bool decompiled{}; |
| 420 | bool disable_flow_stack{}; | 420 | bool disable_flow_stack{}; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 15e22b9fa..10739b37d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -81,26 +81,20 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); |
| 82 | return {tracked, track}; | 82 | return {tracked, track}; |
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 84 | auto bound_buffer = locker.ObtainBoundBuffer(); | 84 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 85 | if (!bound_buffer) { | 85 | if (bound_buffer != cbuf->GetIndex()) { |
| 86 | return {}; | 86 | return {}; |
| 87 | } | 87 | } |
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | 88 | const auto pair = DecoupleIndirectRead(*operation); |
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | 89 | if (!pair) { |
| 93 | return {}; | 90 | return {}; |
| 94 | } | 91 | } |
| 95 | auto [gpr, base_offset] = *pair; | 92 | auto [gpr, base_offset] = *pair; |
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | 93 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); |
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | 94 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); |
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | 95 | const u32 bindless_cv = NewCustomVariable(); |
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | 96 | const Node op = |
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | 97 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); |
| 104 | 98 | ||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | 99 | const Node cv_node = GetCustomVariable(bindless_cv); |
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | 100 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); |
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp new file mode 100644 index 000000000..22a933761 --- /dev/null +++ b/src/video_core/shader/transform_feedback.cpp | |||
| @@ -0,0 +1,115 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h new file mode 100644 index 000000000..77d05f64c --- /dev/null +++ b/src/video_core/shader/transform_feedback.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 9707c353d..cc7181229 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) | |||
| 111 | return PixelFormat::RGBA16F; | 111 | return PixelFormat::RGBA16F; |
| 112 | case Tegra::RenderTargetFormat::RGBA16_UNORM: | 112 | case Tegra::RenderTargetFormat::RGBA16_UNORM: |
| 113 | return PixelFormat::RGBA16U; | 113 | return PixelFormat::RGBA16U; |
| 114 | case Tegra::RenderTargetFormat::RGBA16_SNORM: | ||
| 115 | return PixelFormat::RGBA16S; | ||
| 114 | case Tegra::RenderTargetFormat::RGBA16_UINT: | 116 | case Tegra::RenderTargetFormat::RGBA16_UINT: |
| 115 | return PixelFormat::RGBA16UI; | 117 | return PixelFormat::RGBA16UI; |
| 116 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: | 118 | case Tegra::RenderTargetFormat::RGBA32_FLOAT: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index d88109e5a..ae8817465 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -25,82 +25,83 @@ enum class PixelFormat { | |||
| 25 | R8UI = 7, | 25 | R8UI = 7, |
| 26 | RGBA16F = 8, | 26 | RGBA16F = 8, |
| 27 | RGBA16U = 9, | 27 | RGBA16U = 9, |
| 28 | RGBA16UI = 10, | 28 | RGBA16S = 10, |
| 29 | R11FG11FB10F = 11, | 29 | RGBA16UI = 11, |
| 30 | RGBA32UI = 12, | 30 | R11FG11FB10F = 12, |
| 31 | DXT1 = 13, | 31 | RGBA32UI = 13, |
| 32 | DXT23 = 14, | 32 | DXT1 = 14, |
| 33 | DXT45 = 15, | 33 | DXT23 = 15, |
| 34 | DXN1 = 16, // This is also known as BC4 | 34 | DXT45 = 16, |
| 35 | DXN2UNORM = 17, | 35 | DXN1 = 17, // This is also known as BC4 |
| 36 | DXN2SNORM = 18, | 36 | DXN2UNORM = 18, |
| 37 | BC7U = 19, | 37 | DXN2SNORM = 19, |
| 38 | BC6H_UF16 = 20, | 38 | BC7U = 20, |
| 39 | BC6H_SF16 = 21, | 39 | BC6H_UF16 = 21, |
| 40 | ASTC_2D_4X4 = 22, | 40 | BC6H_SF16 = 22, |
| 41 | BGRA8 = 23, | 41 | ASTC_2D_4X4 = 23, |
| 42 | RGBA32F = 24, | 42 | BGRA8 = 24, |
| 43 | RG32F = 25, | 43 | RGBA32F = 25, |
| 44 | R32F = 26, | 44 | RG32F = 26, |
| 45 | R16F = 27, | 45 | R32F = 27, |
| 46 | R16U = 28, | 46 | R16F = 28, |
| 47 | R16S = 29, | 47 | R16U = 29, |
| 48 | R16UI = 30, | 48 | R16S = 30, |
| 49 | R16I = 31, | 49 | R16UI = 31, |
| 50 | RG16 = 32, | 50 | R16I = 32, |
| 51 | RG16F = 33, | 51 | RG16 = 33, |
| 52 | RG16UI = 34, | 52 | RG16F = 34, |
| 53 | RG16I = 35, | 53 | RG16UI = 35, |
| 54 | RG16S = 36, | 54 | RG16I = 36, |
| 55 | RGB32F = 37, | 55 | RG16S = 37, |
| 56 | RGBA8_SRGB = 38, | 56 | RGB32F = 38, |
| 57 | RG8U = 39, | 57 | RGBA8_SRGB = 39, |
| 58 | RG8S = 40, | 58 | RG8U = 40, |
| 59 | RG32UI = 41, | 59 | RG8S = 41, |
| 60 | RGBX16F = 42, | 60 | RG32UI = 42, |
| 61 | R32UI = 43, | 61 | RGBX16F = 43, |
| 62 | R32I = 44, | 62 | R32UI = 44, |
| 63 | ASTC_2D_8X8 = 45, | 63 | R32I = 45, |
| 64 | ASTC_2D_8X5 = 46, | 64 | ASTC_2D_8X8 = 46, |
| 65 | ASTC_2D_5X4 = 47, | 65 | ASTC_2D_8X5 = 47, |
| 66 | BGRA8_SRGB = 48, | 66 | ASTC_2D_5X4 = 48, |
| 67 | DXT1_SRGB = 49, | 67 | BGRA8_SRGB = 49, |
| 68 | DXT23_SRGB = 50, | 68 | DXT1_SRGB = 50, |
| 69 | DXT45_SRGB = 51, | 69 | DXT23_SRGB = 51, |
| 70 | BC7U_SRGB = 52, | 70 | DXT45_SRGB = 52, |
| 71 | R4G4B4A4U = 53, | 71 | BC7U_SRGB = 53, |
| 72 | ASTC_2D_4X4_SRGB = 54, | 72 | R4G4B4A4U = 54, |
| 73 | ASTC_2D_8X8_SRGB = 55, | 73 | ASTC_2D_4X4_SRGB = 55, |
| 74 | ASTC_2D_8X5_SRGB = 56, | 74 | ASTC_2D_8X8_SRGB = 56, |
| 75 | ASTC_2D_5X4_SRGB = 57, | 75 | ASTC_2D_8X5_SRGB = 57, |
| 76 | ASTC_2D_5X5 = 58, | 76 | ASTC_2D_5X4_SRGB = 58, |
| 77 | ASTC_2D_5X5_SRGB = 59, | 77 | ASTC_2D_5X5 = 59, |
| 78 | ASTC_2D_10X8 = 60, | 78 | ASTC_2D_5X5_SRGB = 60, |
| 79 | ASTC_2D_10X8_SRGB = 61, | 79 | ASTC_2D_10X8 = 61, |
| 80 | ASTC_2D_6X6 = 62, | 80 | ASTC_2D_10X8_SRGB = 62, |
| 81 | ASTC_2D_6X6_SRGB = 63, | 81 | ASTC_2D_6X6 = 63, |
| 82 | ASTC_2D_10X10 = 64, | 82 | ASTC_2D_6X6_SRGB = 64, |
| 83 | ASTC_2D_10X10_SRGB = 65, | 83 | ASTC_2D_10X10 = 65, |
| 84 | ASTC_2D_12X12 = 66, | 84 | ASTC_2D_10X10_SRGB = 66, |
| 85 | ASTC_2D_12X12_SRGB = 67, | 85 | ASTC_2D_12X12 = 67, |
| 86 | ASTC_2D_8X6 = 68, | 86 | ASTC_2D_12X12_SRGB = 68, |
| 87 | ASTC_2D_8X6_SRGB = 69, | 87 | ASTC_2D_8X6 = 69, |
| 88 | ASTC_2D_6X5 = 70, | 88 | ASTC_2D_8X6_SRGB = 70, |
| 89 | ASTC_2D_6X5_SRGB = 71, | 89 | ASTC_2D_6X5 = 71, |
| 90 | E5B9G9R9F = 72, | 90 | ASTC_2D_6X5_SRGB = 72, |
| 91 | E5B9G9R9F = 73, | ||
| 91 | 92 | ||
| 92 | MaxColorFormat, | 93 | MaxColorFormat, |
| 93 | 94 | ||
| 94 | // Depth formats | 95 | // Depth formats |
| 95 | Z32F = 73, | 96 | Z32F = 74, |
| 96 | Z16 = 74, | 97 | Z16 = 75, |
| 97 | 98 | ||
| 98 | MaxDepthFormat, | 99 | MaxDepthFormat, |
| 99 | 100 | ||
| 100 | // DepthStencil formats | 101 | // DepthStencil formats |
| 101 | Z24S8 = 75, | 102 | Z24S8 = 76, |
| 102 | S8Z24 = 76, | 103 | S8Z24 = 77, |
| 103 | Z32FS8 = 77, | 104 | Z32FS8 = 78, |
| 104 | 105 | ||
| 105 | MaxDepthStencilFormat, | 106 | MaxDepthStencilFormat, |
| 106 | 107 | ||
| @@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | |||
| 138 | 0, // R8UI | 139 | 0, // R8UI |
| 139 | 0, // RGBA16F | 140 | 0, // RGBA16F |
| 140 | 0, // RGBA16U | 141 | 0, // RGBA16U |
| 142 | 0, // RGBA16S | ||
| 141 | 0, // RGBA16UI | 143 | 0, // RGBA16UI |
| 142 | 0, // R11FG11FB10F | 144 | 0, // R11FG11FB10F |
| 143 | 0, // RGBA32UI | 145 | 0, // RGBA32UI |
| @@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 235 | 1, // R8UI | 237 | 1, // R8UI |
| 236 | 1, // RGBA16F | 238 | 1, // RGBA16F |
| 237 | 1, // RGBA16U | 239 | 1, // RGBA16U |
| 240 | 1, // RGBA16S | ||
| 238 | 1, // RGBA16UI | 241 | 1, // RGBA16UI |
| 239 | 1, // R11FG11FB10F | 242 | 1, // R11FG11FB10F |
| 240 | 1, // RGBA32UI | 243 | 1, // RGBA32UI |
| @@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | |||
| 324 | 1, // R8UI | 327 | 1, // R8UI |
| 325 | 1, // RGBA16F | 328 | 1, // RGBA16F |
| 326 | 1, // RGBA16U | 329 | 1, // RGBA16U |
| 330 | 1, // RGBA16S | ||
| 327 | 1, // RGBA16UI | 331 | 1, // RGBA16UI |
| 328 | 1, // R11FG11FB10F | 332 | 1, // R11FG11FB10F |
| 329 | 1, // RGBA32UI | 333 | 1, // RGBA32UI |
| @@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 413 | 8, // R8UI | 417 | 8, // R8UI |
| 414 | 64, // RGBA16F | 418 | 64, // RGBA16F |
| 415 | 64, // RGBA16U | 419 | 64, // RGBA16U |
| 420 | 64, // RGBA16S | ||
| 416 | 64, // RGBA16UI | 421 | 64, // RGBA16UI |
| 417 | 32, // R11FG11FB10F | 422 | 32, // R11FG11FB10F |
| 418 | 128, // RGBA32UI | 423 | 128, // RGBA32UI |
| @@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table | |||
| 517 | SurfaceCompression::None, // R8UI | 522 | SurfaceCompression::None, // R8UI |
| 518 | SurfaceCompression::None, // RGBA16F | 523 | SurfaceCompression::None, // RGBA16F |
| 519 | SurfaceCompression::None, // RGBA16U | 524 | SurfaceCompression::None, // RGBA16U |
| 525 | SurfaceCompression::None, // RGBA16S | ||
| 520 | SurfaceCompression::None, // RGBA16UI | 526 | SurfaceCompression::None, // RGBA16UI |
| 521 | SurfaceCompression::None, // R11FG11FB10F | 527 | SurfaceCompression::None, // R11FG11FB10F |
| 522 | SurfaceCompression::None, // RGBA32UI | 528 | SurfaceCompression::None, // RGBA32UI |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index cc3ad8417..e151c26c4 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 75> DefinitionTable = {{ | 44 | constexpr std::array<Table, 76> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{ | |||
| 61 | {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, | 61 | {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, |
| 62 | {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, | 62 | {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, |
| 63 | 63 | ||
| 64 | {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, | ||
| 64 | {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, | 65 | {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, |
| 65 | {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, | 66 | {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, |
| 66 | {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, | 67 | {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index f00839313..9931c5ef7 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -113,8 +113,10 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta | |||
| 113 | params.height = tic.Height(); | 113 | params.height = tic.Height(); |
| 114 | params.depth = tic.Depth(); | 114 | params.depth = tic.Depth(); |
| 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); |
| 116 | if (params.target == SurfaceTarget::TextureCubemap || | 116 | if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { |
| 117 | params.target == SurfaceTarget::TextureCubeArray) { | 117 | params.depth = 1; |
| 118 | } else if (params.target == SurfaceTarget::TextureCubemap || | ||
| 119 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 118 | params.depth *= 6; | 120 | params.depth *= 6; |
| 119 | } | 121 | } |
| 120 | params.num_levels = tic.max_mip_level + 1; | 122 | params.num_levels = tic.max_mip_level + 1; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 51373b687..6cdbe63d0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -104,6 +104,11 @@ public: | |||
| 104 | if (!cache_addr) { | 104 | if (!cache_addr) { |
| 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 106 | } | 106 | } |
| 107 | |||
| 108 | if (!IsTypeCompatible(tic.texture_type, entry)) { | ||
| 109 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | ||
| 110 | } | ||
| 111 | |||
| 107 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 112 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; |
| 108 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 113 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); |
| 109 | if (guard_samplers) { | 114 | if (guard_samplers) { |
| @@ -914,13 +919,15 @@ private: | |||
| 914 | params.width = 1; | 919 | params.width = 1; |
| 915 | params.height = 1; | 920 | params.height = 1; |
| 916 | params.depth = 1; | 921 | params.depth = 1; |
| 922 | if (target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray) { | ||
| 923 | params.depth = 6; | ||
| 924 | } | ||
| 917 | params.pitch = 4; | 925 | params.pitch = 4; |
| 918 | params.num_levels = 1; | 926 | params.num_levels = 1; |
| 919 | params.emulated_levels = 1; | 927 | params.emulated_levels = 1; |
| 920 | params.pixel_format = VideoCore::Surface::PixelFormat::RGBA16F; | 928 | params.pixel_format = VideoCore::Surface::PixelFormat::R8U; |
| 921 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; | 929 | params.type = VideoCore::Surface::SurfaceType::ColorTexture; |
| 922 | auto surface = CreateSurface(0ULL, params); | 930 | auto surface = CreateSurface(0ULL, params); |
| 923 | invalid_memory.clear(); | ||
| 924 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); | 931 | invalid_memory.resize(surface->GetHostSizeInBytes(), 0U); |
| 925 | surface->UploadTexture(invalid_memory); | 932 | surface->UploadTexture(invalid_memory); |
| 926 | surface->MarkAsModified(false, Tick()); | 933 | surface->MarkAsModified(false, Tick()); |
| @@ -1082,6 +1089,36 @@ private: | |||
| 1082 | return siblings_table[static_cast<std::size_t>(format)]; | 1089 | return siblings_table[static_cast<std::size_t>(format)]; |
| 1083 | } | 1090 | } |
| 1084 | 1091 | ||
| 1092 | /// Returns true the shader sampler entry is compatible with the TIC texture type. | ||
| 1093 | static bool IsTypeCompatible(Tegra::Texture::TextureType tic_type, | ||
| 1094 | const VideoCommon::Shader::Sampler& entry) { | ||
| 1095 | const auto shader_type = entry.GetType(); | ||
| 1096 | switch (tic_type) { | ||
| 1097 | case Tegra::Texture::TextureType::Texture1D: | ||
| 1098 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 1099 | return shader_type == Tegra::Shader::TextureType::Texture1D; | ||
| 1100 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 1101 | // TODO(Rodrigo): Assume as valid for now | ||
| 1102 | return true; | ||
| 1103 | case Tegra::Texture::TextureType::Texture2D: | ||
| 1104 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 1105 | return shader_type == Tegra::Shader::TextureType::Texture2D; | ||
| 1106 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 1107 | return shader_type == Tegra::Shader::TextureType::Texture2D || | ||
| 1108 | shader_type == Tegra::Shader::TextureType::TextureCube; | ||
| 1109 | case Tegra::Texture::TextureType::Texture3D: | ||
| 1110 | return shader_type == Tegra::Shader::TextureType::Texture3D; | ||
| 1111 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 1112 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 1113 | if (shader_type == Tegra::Shader::TextureType::TextureCube) { | ||
| 1114 | return true; | ||
| 1115 | } | ||
| 1116 | return shader_type == Tegra::Shader::TextureType::Texture2D && entry.IsArray(); | ||
| 1117 | } | ||
| 1118 | UNREACHABLE(); | ||
| 1119 | return true; | ||
| 1120 | } | ||
| 1121 | |||
| 1085 | struct FramebufferTargetInfo { | 1122 | struct FramebufferTargetInfo { |
| 1086 | TSurface target; | 1123 | TSurface target; |
| 1087 | TView view; | 1124 | TView view; |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 33bd31865..062b4f252 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -17,26 +17,37 @@ | |||
| 17 | 17 | ||
| 18 | #include <algorithm> | 18 | #include <algorithm> |
| 19 | #include <cassert> | 19 | #include <cassert> |
| 20 | #include <cstdint> | ||
| 21 | #include <cstring> | 20 | #include <cstring> |
| 22 | #include <vector> | 21 | #include <vector> |
| 23 | 22 | ||
| 23 | #include "common/common_types.h" | ||
| 24 | |||
| 24 | #include "video_core/textures/astc.h" | 25 | #include "video_core/textures/astc.h" |
| 25 | 26 | ||
| 27 | namespace { | ||
| 28 | |||
| 29 | /// Count the number of bits set in a number. | ||
| 30 | constexpr u32 Popcnt(u32 n) { | ||
| 31 | u32 c = 0; | ||
| 32 | for (; n; c++) { | ||
| 33 | n &= n - 1; | ||
| 34 | } | ||
| 35 | return c; | ||
| 36 | } | ||
| 37 | |||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 26 | class InputBitStream { | 40 | class InputBitStream { |
| 27 | public: | 41 | public: |
| 28 | explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) | 42 | explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
| 29 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 43 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 30 | 44 | ||
| 31 | ~InputBitStream() = default; | 45 | std::size_t GetBitsRead() const { |
| 32 | |||
| 33 | int GetBitsRead() const { | ||
| 34 | return m_BitsRead; | 46 | return m_BitsRead; |
| 35 | } | 47 | } |
| 36 | 48 | ||
| 37 | int ReadBit() { | 49 | u32 ReadBit() { |
| 38 | 50 | u32 bit = *m_CurByte >> m_NextBit++; | |
| 39 | int bit = *m_CurByte >> m_NextBit++; | ||
| 40 | while (m_NextBit >= 8) { | 51 | while (m_NextBit >= 8) { |
| 41 | m_NextBit -= 8; | 52 | m_NextBit -= 8; |
| 42 | m_CurByte++; | 53 | m_CurByte++; |
| @@ -46,57 +57,66 @@ public: | |||
| 46 | return bit & 1; | 57 | return bit & 1; |
| 47 | } | 58 | } |
| 48 | 59 | ||
| 49 | unsigned int ReadBits(unsigned int nBits) { | 60 | u32 ReadBits(std::size_t nBits) { |
| 50 | unsigned int ret = 0; | 61 | u32 ret = 0; |
| 51 | for (unsigned int i = 0; i < nBits; i++) { | 62 | for (std::size_t i = 0; i < nBits; ++i) { |
| 63 | ret |= (ReadBit() & 1) << i; | ||
| 64 | } | ||
| 65 | return ret; | ||
| 66 | } | ||
| 67 | |||
| 68 | template <std::size_t nBits> | ||
| 69 | u32 ReadBits() { | ||
| 70 | u32 ret = 0; | ||
| 71 | for (std::size_t i = 0; i < nBits; ++i) { | ||
| 52 | ret |= (ReadBit() & 1) << i; | 72 | ret |= (ReadBit() & 1) << i; |
| 53 | } | 73 | } |
| 54 | return ret; | 74 | return ret; |
| 55 | } | 75 | } |
| 56 | 76 | ||
| 57 | private: | 77 | private: |
| 58 | const unsigned char* m_CurByte; | 78 | const u8* m_CurByte; |
| 59 | int m_NextBit = 0; | 79 | std::size_t m_NextBit = 0; |
| 60 | int m_BitsRead = 0; | 80 | std::size_t m_BitsRead = 0; |
| 61 | }; | 81 | }; |
| 62 | 82 | ||
| 63 | class OutputBitStream { | 83 | class OutputBitStream { |
| 64 | public: | 84 | public: |
| 65 | explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | 85 | explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) |
| 66 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 86 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 67 | 87 | ||
| 68 | ~OutputBitStream() = default; | 88 | ~OutputBitStream() = default; |
| 69 | 89 | ||
| 70 | int GetBitsWritten() const { | 90 | s32 GetBitsWritten() const { |
| 71 | return m_BitsWritten; | 91 | return m_BitsWritten; |
| 72 | } | 92 | } |
| 73 | 93 | ||
| 74 | void WriteBitsR(unsigned int val, unsigned int nBits) { | 94 | void WriteBitsR(u32 val, u32 nBits) { |
| 75 | for (unsigned int i = 0; i < nBits; i++) { | 95 | for (u32 i = 0; i < nBits; i++) { |
| 76 | WriteBit((val >> (nBits - i - 1)) & 1); | 96 | WriteBit((val >> (nBits - i - 1)) & 1); |
| 77 | } | 97 | } |
| 78 | } | 98 | } |
| 79 | 99 | ||
| 80 | void WriteBits(unsigned int val, unsigned int nBits) { | 100 | void WriteBits(u32 val, u32 nBits) { |
| 81 | for (unsigned int i = 0; i < nBits; i++) { | 101 | for (u32 i = 0; i < nBits; i++) { |
| 82 | WriteBit((val >> i) & 1); | 102 | WriteBit((val >> i) & 1); |
| 83 | } | 103 | } |
| 84 | } | 104 | } |
| 85 | 105 | ||
| 86 | private: | 106 | private: |
| 87 | void WriteBit(int b) { | 107 | void WriteBit(s32 b) { |
| 88 | 108 | ||
| 89 | if (done) | 109 | if (done) |
| 90 | return; | 110 | return; |
| 91 | 111 | ||
| 92 | const unsigned int mask = 1 << m_NextBit++; | 112 | const u32 mask = 1 << m_NextBit++; |
| 93 | 113 | ||
| 94 | // clear the bit | 114 | // clear the bit |
| 95 | *m_CurByte &= static_cast<unsigned char>(~mask); | 115 | *m_CurByte &= static_cast<u8>(~mask); |
| 96 | 116 | ||
| 97 | // Write the bit, if necessary | 117 | // Write the bit, if necessary |
| 98 | if (b) | 118 | if (b) |
| 99 | *m_CurByte |= static_cast<unsigned char>(mask); | 119 | *m_CurByte |= static_cast<u8>(mask); |
| 100 | 120 | ||
| 101 | // Next byte? | 121 | // Next byte? |
| 102 | if (m_NextBit >= 8) { | 122 | if (m_NextBit >= 8) { |
| @@ -107,10 +127,10 @@ private: | |||
| 107 | done = done || ++m_BitsWritten >= m_NumBits; | 127 | done = done || ++m_BitsWritten >= m_NumBits; |
| 108 | } | 128 | } |
| 109 | 129 | ||
| 110 | int m_BitsWritten = 0; | 130 | s32 m_BitsWritten = 0; |
| 111 | const int m_NumBits; | 131 | const s32 m_NumBits; |
| 112 | unsigned char* m_CurByte; | 132 | u8* m_CurByte; |
| 113 | int m_NextBit = 0; | 133 | s32 m_NextBit = 0; |
| 114 | 134 | ||
| 115 | bool done = false; | 135 | bool done = false; |
| 116 | }; | 136 | }; |
| @@ -123,20 +143,20 @@ public: | |||
| 123 | Bits(const Bits&) = delete; | 143 | Bits(const Bits&) = delete; |
| 124 | Bits& operator=(const Bits&) = delete; | 144 | Bits& operator=(const Bits&) = delete; |
| 125 | 145 | ||
| 126 | uint8_t operator[](uint32_t bitPos) const { | 146 | u8 operator[](u32 bitPos) const { |
| 127 | return static_cast<uint8_t>((m_Bits >> bitPos) & 1); | 147 | return static_cast<u8>((m_Bits >> bitPos) & 1); |
| 128 | } | 148 | } |
| 129 | 149 | ||
| 130 | IntType operator()(uint32_t start, uint32_t end) const { | 150 | IntType operator()(u32 start, u32 end) const { |
| 131 | if (start == end) { | 151 | if (start == end) { |
| 132 | return (*this)[start]; | 152 | return (*this)[start]; |
| 133 | } else if (start > end) { | 153 | } else if (start > end) { |
| 134 | uint32_t t = start; | 154 | u32 t = start; |
| 135 | start = end; | 155 | start = end; |
| 136 | end = t; | 156 | end = t; |
| 137 | } | 157 | } |
| 138 | 158 | ||
| 139 | uint64_t mask = (1 << (end - start + 1)) - 1; | 159 | u64 mask = (1 << (end - start + 1)) - 1; |
| 140 | return (m_Bits >> start) & static_cast<IntType>(mask); | 160 | return (m_Bits >> start) & static_cast<IntType>(mask); |
| 141 | } | 161 | } |
| 142 | 162 | ||
| @@ -144,273 +164,236 @@ private: | |||
| 144 | const IntType& m_Bits; | 164 | const IntType& m_Bits; |
| 145 | }; | 165 | }; |
| 146 | 166 | ||
| 147 | enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; | 167 | enum class IntegerEncoding { JustBits, Qus32, Trit }; |
| 148 | |||
| 149 | class IntegerEncodedValue { | ||
| 150 | private: | ||
| 151 | const EIntegerEncoding m_Encoding; | ||
| 152 | const uint32_t m_NumBits; | ||
| 153 | uint32_t m_BitValue; | ||
| 154 | union { | ||
| 155 | uint32_t m_QuintValue; | ||
| 156 | uint32_t m_TritValue; | ||
| 157 | }; | ||
| 158 | 168 | ||
| 159 | public: | 169 | struct IntegerEncodedValue { |
| 160 | // Jank, but we're not doing any heavy lifting in this class, so it's | 170 | constexpr IntegerEncodedValue() = default; |
| 161 | // probably OK. It allows us to use these in std::vectors... | ||
| 162 | IntegerEncodedValue& operator=(const IntegerEncodedValue& other) { | ||
| 163 | new (this) IntegerEncodedValue(other); | ||
| 164 | return *this; | ||
| 165 | } | ||
| 166 | 171 | ||
| 167 | IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) | 172 | constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) |
| 168 | : m_Encoding(encoding), m_NumBits(numBits) {} | 173 | : encoding{encoding_}, num_bits{num_bits_} {} |
| 169 | 174 | ||
| 170 | EIntegerEncoding GetEncoding() const { | 175 | constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { |
| 171 | return m_Encoding; | 176 | return encoding == other.encoding && num_bits == other.num_bits; |
| 172 | } | ||
| 173 | uint32_t BaseBitLength() const { | ||
| 174 | return m_NumBits; | ||
| 175 | } | ||
| 176 | |||
| 177 | uint32_t GetBitValue() const { | ||
| 178 | return m_BitValue; | ||
| 179 | } | ||
| 180 | void SetBitValue(uint32_t val) { | ||
| 181 | m_BitValue = val; | ||
| 182 | } | ||
| 183 | |||
| 184 | uint32_t GetTritValue() const { | ||
| 185 | return m_TritValue; | ||
| 186 | } | ||
| 187 | void SetTritValue(uint32_t val) { | ||
| 188 | m_TritValue = val; | ||
| 189 | } | ||
| 190 | |||
| 191 | uint32_t GetQuintValue() const { | ||
| 192 | return m_QuintValue; | ||
| 193 | } | ||
| 194 | void SetQuintValue(uint32_t val) { | ||
| 195 | m_QuintValue = val; | ||
| 196 | } | ||
| 197 | |||
| 198 | bool MatchesEncoding(const IntegerEncodedValue& other) const { | ||
| 199 | return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits; | ||
| 200 | } | 177 | } |
| 201 | 178 | ||
| 202 | // Returns the number of bits required to encode nVals values. | 179 | // Returns the number of bits required to encode nVals values. |
| 203 | uint32_t GetBitLength(uint32_t nVals) const { | 180 | u32 GetBitLength(u32 nVals) const { |
| 204 | uint32_t totalBits = m_NumBits * nVals; | 181 | u32 totalBits = num_bits * nVals; |
| 205 | if (m_Encoding == eIntegerEncoding_Trit) { | 182 | if (encoding == IntegerEncoding::Trit) { |
| 206 | totalBits += (nVals * 8 + 4) / 5; | 183 | totalBits += (nVals * 8 + 4) / 5; |
| 207 | } else if (m_Encoding == eIntegerEncoding_Quint) { | 184 | } else if (encoding == IntegerEncoding::Qus32) { |
| 208 | totalBits += (nVals * 7 + 2) / 3; | 185 | totalBits += (nVals * 7 + 2) / 3; |
| 209 | } | 186 | } |
| 210 | return totalBits; | 187 | return totalBits; |
| 211 | } | 188 | } |
| 212 | 189 | ||
| 213 | // Count the number of bits set in a number. | 190 | IntegerEncoding encoding{}; |
| 214 | static inline uint32_t Popcnt(uint32_t n) { | 191 | u32 num_bits = 0; |
| 215 | uint32_t c; | 192 | u32 bit_value = 0; |
| 216 | for (c = 0; n; c++) { | 193 | union { |
| 217 | n &= n - 1; | 194 | u32 qus32_value = 0; |
| 195 | u32 trit_value; | ||
| 196 | }; | ||
| 197 | }; | ||
| 198 | |||
| 199 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | ||
| 200 | u32 nBitsPerValue) { | ||
| 201 | // Implement the algorithm in section C.2.12 | ||
| 202 | u32 m[5]; | ||
| 203 | u32 t[5]; | ||
| 204 | u32 T; | ||
| 205 | |||
| 206 | // Read the trit encoded block according to | ||
| 207 | // table C.2.14 | ||
| 208 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 209 | T = bits.ReadBits<2>(); | ||
| 210 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 211 | T |= bits.ReadBits<2>() << 2; | ||
| 212 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 213 | T |= bits.ReadBit() << 4; | ||
| 214 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 215 | T |= bits.ReadBits<2>() << 5; | ||
| 216 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 217 | T |= bits.ReadBit() << 7; | ||
| 218 | |||
| 219 | u32 C = 0; | ||
| 220 | |||
| 221 | Bits<u32> Tb(T); | ||
| 222 | if (Tb(2, 4) == 7) { | ||
| 223 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 224 | t[4] = t[3] = 2; | ||
| 225 | } else { | ||
| 226 | C = Tb(0, 4); | ||
| 227 | if (Tb(5, 6) == 3) { | ||
| 228 | t[4] = 2; | ||
| 229 | t[3] = Tb[7]; | ||
| 230 | } else { | ||
| 231 | t[4] = Tb[7]; | ||
| 232 | t[3] = Tb(5, 6); | ||
| 218 | } | 233 | } |
| 219 | return c; | ||
| 220 | } | 234 | } |
| 221 | 235 | ||
| 222 | // Returns a new instance of this struct that corresponds to the | 236 | Bits<u32> Cb(C); |
| 223 | // can take no more than maxval values | 237 | if (Cb(0, 1) == 3) { |
| 224 | static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { | 238 | t[2] = 2; |
| 225 | while (maxVal > 0) { | 239 | t[1] = Cb[4]; |
| 226 | uint32_t check = maxVal + 1; | 240 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); |
| 227 | 241 | } else if (Cb(2, 3) == 3) { | |
| 228 | // Is maxVal a power of two? | 242 | t[2] = 2; |
| 229 | if (!(check & (check - 1))) { | 243 | t[1] = 2; |
| 230 | return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); | 244 | t[0] = Cb(0, 1); |
| 231 | } | 245 | } else { |
| 232 | 246 | t[2] = Cb[4]; | |
| 233 | // Is maxVal of the type 3*2^n - 1? | 247 | t[1] = Cb(2, 3); |
| 234 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { | 248 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); |
| 235 | return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); | 249 | } |
| 236 | } | ||
| 237 | 250 | ||
| 238 | // Is maxVal of the type 5*2^n - 1? | 251 | for (std::size_t i = 0; i < 5; ++i) { |
| 239 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { | 252 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); |
| 240 | return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); | 253 | val.bit_value = m[i]; |
| 241 | } | 254 | val.trit_value = t[i]; |
| 255 | } | ||
| 256 | } | ||
| 242 | 257 | ||
| 243 | // Apparently it can't be represented with a bounded integer sequence... | 258 | static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 244 | // just iterate. | 259 | u32 nBitsPerValue) { |
| 245 | maxVal--; | 260 | // Implement the algorithm in section C.2.12 |
| 261 | u32 m[3]; | ||
| 262 | u32 q[3]; | ||
| 263 | u32 Q; | ||
| 264 | |||
| 265 | // Read the trit encoded block according to | ||
| 266 | // table C.2.15 | ||
| 267 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 268 | Q = bits.ReadBits<3>(); | ||
| 269 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 270 | Q |= bits.ReadBits<2>() << 3; | ||
| 271 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 272 | Q |= bits.ReadBits<2>() << 5; | ||
| 273 | |||
| 274 | Bits<u32> Qb(Q); | ||
| 275 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 276 | q[0] = q[1] = 4; | ||
| 277 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 278 | } else { | ||
| 279 | u32 C = 0; | ||
| 280 | if (Qb(1, 2) == 3) { | ||
| 281 | q[2] = 4; | ||
| 282 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 283 | } else { | ||
| 284 | q[2] = Qb(5, 6); | ||
| 285 | C = Qb(0, 4); | ||
| 246 | } | 286 | } |
| 247 | return IntegerEncodedValue(eIntegerEncoding_JustBits, 0); | ||
| 248 | } | ||
| 249 | |||
| 250 | // Fills result with the values that are encoded in the given | ||
| 251 | // bitstream. We must know beforehand what the maximum possible | ||
| 252 | // value is, and how many values we're decoding. | ||
| 253 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, | ||
| 254 | InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { | ||
| 255 | // Determine encoding parameters | ||
| 256 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); | ||
| 257 | |||
| 258 | // Start decoding | ||
| 259 | uint32_t nValsDecoded = 0; | ||
| 260 | while (nValsDecoded < nValues) { | ||
| 261 | switch (val.GetEncoding()) { | ||
| 262 | case eIntegerEncoding_Quint: | ||
| 263 | DecodeQuintBlock(bits, result, val.BaseBitLength()); | ||
| 264 | nValsDecoded += 3; | ||
| 265 | break; | ||
| 266 | 287 | ||
| 267 | case eIntegerEncoding_Trit: | 288 | Bits<u32> Cb(C); |
| 268 | DecodeTritBlock(bits, result, val.BaseBitLength()); | 289 | if (Cb(0, 2) == 5) { |
| 269 | nValsDecoded += 5; | 290 | q[1] = 4; |
| 270 | break; | 291 | q[0] = Cb(3, 4); |
| 271 | 292 | } else { | |
| 272 | case eIntegerEncoding_JustBits: | 293 | q[1] = Cb(3, 4); |
| 273 | val.SetBitValue(bits.ReadBits(val.BaseBitLength())); | 294 | q[0] = Cb(0, 2); |
| 274 | result.push_back(val); | ||
| 275 | nValsDecoded++; | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | } | 295 | } |
| 279 | } | 296 | } |
| 280 | 297 | ||
| 281 | private: | 298 | for (std::size_t i = 0; i < 3; ++i) { |
| 282 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 299 | IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue); |
| 283 | uint32_t nBitsPerValue) { | 300 | val.bit_value = m[i]; |
| 284 | // Implement the algorithm in section C.2.12 | 301 | val.qus32_value = q[i]; |
| 285 | uint32_t m[5]; | 302 | } |
| 286 | uint32_t t[5]; | 303 | } |
| 287 | uint32_t T; | 304 | |
| 288 | 305 | // Returns a new instance of this struct that corresponds to the | |
| 289 | // Read the trit encoded block according to | 306 | // can take no more than maxval values |
| 290 | // table C.2.14 | 307 | static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { |
| 291 | m[0] = bits.ReadBits(nBitsPerValue); | 308 | while (maxVal > 0) { |
| 292 | T = bits.ReadBits(2); | 309 | u32 check = maxVal + 1; |
| 293 | m[1] = bits.ReadBits(nBitsPerValue); | 310 | |
| 294 | T |= bits.ReadBits(2) << 2; | 311 | // Is maxVal a power of two? |
| 295 | m[2] = bits.ReadBits(nBitsPerValue); | 312 | if (!(check & (check - 1))) { |
| 296 | T |= bits.ReadBit() << 4; | 313 | return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); |
| 297 | m[3] = bits.ReadBits(nBitsPerValue); | ||
| 298 | T |= bits.ReadBits(2) << 5; | ||
| 299 | m[4] = bits.ReadBits(nBitsPerValue); | ||
| 300 | T |= bits.ReadBit() << 7; | ||
| 301 | |||
| 302 | uint32_t C = 0; | ||
| 303 | |||
| 304 | Bits<uint32_t> Tb(T); | ||
| 305 | if (Tb(2, 4) == 7) { | ||
| 306 | C = (Tb(5, 7) << 2) | Tb(0, 1); | ||
| 307 | t[4] = t[3] = 2; | ||
| 308 | } else { | ||
| 309 | C = Tb(0, 4); | ||
| 310 | if (Tb(5, 6) == 3) { | ||
| 311 | t[4] = 2; | ||
| 312 | t[3] = Tb[7]; | ||
| 313 | } else { | ||
| 314 | t[4] = Tb[7]; | ||
| 315 | t[3] = Tb(5, 6); | ||
| 316 | } | ||
| 317 | } | 314 | } |
| 318 | 315 | ||
| 319 | Bits<uint32_t> Cb(C); | 316 | // Is maxVal of the type 3*2^n - 1? |
| 320 | if (Cb(0, 1) == 3) { | 317 | if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { |
| 321 | t[2] = 2; | 318 | return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); |
| 322 | t[1] = Cb[4]; | ||
| 323 | t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); | ||
| 324 | } else if (Cb(2, 3) == 3) { | ||
| 325 | t[2] = 2; | ||
| 326 | t[1] = 2; | ||
| 327 | t[0] = Cb(0, 1); | ||
| 328 | } else { | ||
| 329 | t[2] = Cb[4]; | ||
| 330 | t[1] = Cb(2, 3); | ||
| 331 | t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); | ||
| 332 | } | 319 | } |
| 333 | 320 | ||
| 334 | for (uint32_t i = 0; i < 5; i++) { | 321 | // Is maxVal of the type 5*2^n - 1? |
| 335 | IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); | 322 | if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { |
| 336 | val.SetBitValue(m[i]); | 323 | return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); |
| 337 | val.SetTritValue(t[i]); | ||
| 338 | result.push_back(val); | ||
| 339 | } | 324 | } |
| 325 | |||
| 326 | // Apparently it can't be represented with a bounded integer sequence... | ||
| 327 | // just iterate. | ||
| 328 | maxVal--; | ||
| 340 | } | 329 | } |
| 330 | return IntegerEncodedValue(IntegerEncoding::JustBits, 0); | ||
| 331 | } | ||
| 341 | 332 | ||
| 342 | static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 333 | static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { |
| 343 | uint32_t nBitsPerValue) { | 334 | std::array<IntegerEncodedValue, 256> encodings{}; |
| 344 | // Implement the algorithm in section C.2.12 | 335 | for (std::size_t i = 0; i < encodings.size(); ++i) { |
| 345 | uint32_t m[3]; | 336 | encodings[i] = CreateEncoding(static_cast<u32>(i)); |
| 346 | uint32_t q[3]; | 337 | } |
| 347 | uint32_t Q; | 338 | return encodings; |
| 348 | 339 | } | |
| 349 | // Read the trit encoded block according to | ||
| 350 | // table C.2.15 | ||
| 351 | m[0] = bits.ReadBits(nBitsPerValue); | ||
| 352 | Q = bits.ReadBits(3); | ||
| 353 | m[1] = bits.ReadBits(nBitsPerValue); | ||
| 354 | Q |= bits.ReadBits(2) << 3; | ||
| 355 | m[2] = bits.ReadBits(nBitsPerValue); | ||
| 356 | Q |= bits.ReadBits(2) << 5; | ||
| 357 | |||
| 358 | Bits<uint32_t> Qb(Q); | ||
| 359 | if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { | ||
| 360 | q[0] = q[1] = 4; | ||
| 361 | q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); | ||
| 362 | } else { | ||
| 363 | uint32_t C = 0; | ||
| 364 | if (Qb(1, 2) == 3) { | ||
| 365 | q[2] = 4; | ||
| 366 | C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; | ||
| 367 | } else { | ||
| 368 | q[2] = Qb(5, 6); | ||
| 369 | C = Qb(0, 4); | ||
| 370 | } | ||
| 371 | 340 | ||
| 372 | Bits<uint32_t> Cb(C); | 341 | static constexpr std::array EncodingsValues = MakeEncodedValues(); |
| 373 | if (Cb(0, 2) == 5) { | 342 | |
| 374 | q[1] = 4; | 343 | // Fills result with the values that are encoded in the given |
| 375 | q[0] = Cb(3, 4); | 344 | // bitstream. We must know beforehand what the maximum possible |
| 376 | } else { | 345 | // value is, and how many values we're decoding. |
| 377 | q[1] = Cb(3, 4); | 346 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, |
| 378 | q[0] = Cb(0, 2); | 347 | u32 maxRange, u32 nValues) { |
| 379 | } | 348 | // Determine encoding parameters |
| 380 | } | 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; |
| 350 | |||
| 351 | // Start decoding | ||
| 352 | u32 nValsDecoded = 0; | ||
| 353 | while (nValsDecoded < nValues) { | ||
| 354 | switch (val.encoding) { | ||
| 355 | case IntegerEncoding::Qus32: | ||
| 356 | DecodeQus32Block(bits, result, val.num_bits); | ||
| 357 | nValsDecoded += 3; | ||
| 358 | break; | ||
| 359 | |||
| 360 | case IntegerEncoding::Trit: | ||
| 361 | DecodeTritBlock(bits, result, val.num_bits); | ||
| 362 | nValsDecoded += 5; | ||
| 363 | break; | ||
| 381 | 364 | ||
| 382 | for (uint32_t i = 0; i < 3; i++) { | 365 | case IntegerEncoding::JustBits: |
| 383 | IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); | 366 | val.bit_value = bits.ReadBits(val.num_bits); |
| 384 | val.m_BitValue = m[i]; | ||
| 385 | val.m_QuintValue = q[i]; | ||
| 386 | result.push_back(val); | 367 | result.push_back(val); |
| 368 | nValsDecoded++; | ||
| 369 | break; | ||
| 387 | } | 370 | } |
| 388 | } | 371 | } |
| 389 | }; | 372 | } |
| 390 | 373 | ||
| 391 | namespace ASTCC { | 374 | namespace ASTCC { |
| 392 | 375 | ||
| 393 | struct TexelWeightParams { | 376 | struct TexelWeightParams { |
| 394 | uint32_t m_Width = 0; | 377 | u32 m_Width = 0; |
| 395 | uint32_t m_Height = 0; | 378 | u32 m_Height = 0; |
| 396 | bool m_bDualPlane = false; | 379 | bool m_bDualPlane = false; |
| 397 | uint32_t m_MaxWeight = 0; | 380 | u32 m_MaxWeight = 0; |
| 398 | bool m_bError = false; | 381 | bool m_bError = false; |
| 399 | bool m_bVoidExtentLDR = false; | 382 | bool m_bVoidExtentLDR = false; |
| 400 | bool m_bVoidExtentHDR = false; | 383 | bool m_bVoidExtentHDR = false; |
| 401 | 384 | ||
| 402 | uint32_t GetPackedBitSize() const { | 385 | u32 GetPackedBitSize() const { |
| 403 | // How many indices do we have? | 386 | // How many indices do we have? |
| 404 | uint32_t nIdxs = m_Height * m_Width; | 387 | u32 nIdxs = m_Height * m_Width; |
| 405 | if (m_bDualPlane) { | 388 | if (m_bDualPlane) { |
| 406 | nIdxs *= 2; | 389 | nIdxs *= 2; |
| 407 | } | 390 | } |
| 408 | 391 | ||
| 409 | return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); | 392 | return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); |
| 410 | } | 393 | } |
| 411 | 394 | ||
| 412 | uint32_t GetNumWeightValues() const { | 395 | u32 GetNumWeightValues() const { |
| 413 | uint32_t ret = m_Width * m_Height; | 396 | u32 ret = m_Width * m_Height; |
| 414 | if (m_bDualPlane) { | 397 | if (m_bDualPlane) { |
| 415 | ret *= 2; | 398 | ret *= 2; |
| 416 | } | 399 | } |
| @@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 422 | TexelWeightParams params; | 405 | TexelWeightParams params; |
| 423 | 406 | ||
| 424 | // Read the entire block mode all at once | 407 | // Read the entire block mode all at once |
| 425 | uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); | 408 | u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); |
| 426 | 409 | ||
| 427 | // Does this match the void extent block mode? | 410 | // Does this match the void extent block mode? |
| 428 | if ((modeBits & 0x01FF) == 0x1FC) { | 411 | if ((modeBits & 0x01FF) == 0x1FC) { |
| @@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 457 | // of the block mode. Layout is determined by a number | 440 | // of the block mode. Layout is determined by a number |
| 458 | // between 0 and 9 corresponding to table C.2.8 of the | 441 | // between 0 and 9 corresponding to table C.2.8 of the |
| 459 | // ASTC spec. | 442 | // ASTC spec. |
| 460 | uint32_t layout = 0; | 443 | u32 layout = 0; |
| 461 | 444 | ||
| 462 | if ((modeBits & 0x1) || (modeBits & 0x2)) { | 445 | if ((modeBits & 0x1) || (modeBits & 0x2)) { |
| 463 | // layout is in [0-4] | 446 | // layout is in [0-4] |
| @@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 509 | assert(layout < 10); | 492 | assert(layout < 10); |
| 510 | 493 | ||
| 511 | // Determine R | 494 | // Determine R |
| 512 | uint32_t R = !!(modeBits & 0x10); | 495 | u32 R = !!(modeBits & 0x10); |
| 513 | if (layout < 5) { | 496 | if (layout < 5) { |
| 514 | R |= (modeBits & 0x3) << 1; | 497 | R |= (modeBits & 0x3) << 1; |
| 515 | } else { | 498 | } else { |
| @@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 520 | // Determine width & height | 503 | // Determine width & height |
| 521 | switch (layout) { | 504 | switch (layout) { |
| 522 | case 0: { | 505 | case 0: { |
| 523 | uint32_t A = (modeBits >> 5) & 0x3; | 506 | u32 A = (modeBits >> 5) & 0x3; |
| 524 | uint32_t B = (modeBits >> 7) & 0x3; | 507 | u32 B = (modeBits >> 7) & 0x3; |
| 525 | params.m_Width = B + 4; | 508 | params.m_Width = B + 4; |
| 526 | params.m_Height = A + 2; | 509 | params.m_Height = A + 2; |
| 527 | break; | 510 | break; |
| 528 | } | 511 | } |
| 529 | 512 | ||
| 530 | case 1: { | 513 | case 1: { |
| 531 | uint32_t A = (modeBits >> 5) & 0x3; | 514 | u32 A = (modeBits >> 5) & 0x3; |
| 532 | uint32_t B = (modeBits >> 7) & 0x3; | 515 | u32 B = (modeBits >> 7) & 0x3; |
| 533 | params.m_Width = B + 8; | 516 | params.m_Width = B + 8; |
| 534 | params.m_Height = A + 2; | 517 | params.m_Height = A + 2; |
| 535 | break; | 518 | break; |
| 536 | } | 519 | } |
| 537 | 520 | ||
| 538 | case 2: { | 521 | case 2: { |
| 539 | uint32_t A = (modeBits >> 5) & 0x3; | 522 | u32 A = (modeBits >> 5) & 0x3; |
| 540 | uint32_t B = (modeBits >> 7) & 0x3; | 523 | u32 B = (modeBits >> 7) & 0x3; |
| 541 | params.m_Width = A + 2; | 524 | params.m_Width = A + 2; |
| 542 | params.m_Height = B + 8; | 525 | params.m_Height = B + 8; |
| 543 | break; | 526 | break; |
| 544 | } | 527 | } |
| 545 | 528 | ||
| 546 | case 3: { | 529 | case 3: { |
| 547 | uint32_t A = (modeBits >> 5) & 0x3; | 530 | u32 A = (modeBits >> 5) & 0x3; |
| 548 | uint32_t B = (modeBits >> 7) & 0x1; | 531 | u32 B = (modeBits >> 7) & 0x1; |
| 549 | params.m_Width = A + 2; | 532 | params.m_Width = A + 2; |
| 550 | params.m_Height = B + 6; | 533 | params.m_Height = B + 6; |
| 551 | break; | 534 | break; |
| 552 | } | 535 | } |
| 553 | 536 | ||
| 554 | case 4: { | 537 | case 4: { |
| 555 | uint32_t A = (modeBits >> 5) & 0x3; | 538 | u32 A = (modeBits >> 5) & 0x3; |
| 556 | uint32_t B = (modeBits >> 7) & 0x1; | 539 | u32 B = (modeBits >> 7) & 0x1; |
| 557 | params.m_Width = B + 2; | 540 | params.m_Width = B + 2; |
| 558 | params.m_Height = A + 2; | 541 | params.m_Height = A + 2; |
| 559 | break; | 542 | break; |
| 560 | } | 543 | } |
| 561 | 544 | ||
| 562 | case 5: { | 545 | case 5: { |
| 563 | uint32_t A = (modeBits >> 5) & 0x3; | 546 | u32 A = (modeBits >> 5) & 0x3; |
| 564 | params.m_Width = 12; | 547 | params.m_Width = 12; |
| 565 | params.m_Height = A + 2; | 548 | params.m_Height = A + 2; |
| 566 | break; | 549 | break; |
| 567 | } | 550 | } |
| 568 | 551 | ||
| 569 | case 6: { | 552 | case 6: { |
| 570 | uint32_t A = (modeBits >> 5) & 0x3; | 553 | u32 A = (modeBits >> 5) & 0x3; |
| 571 | params.m_Width = A + 2; | 554 | params.m_Width = A + 2; |
| 572 | params.m_Height = 12; | 555 | params.m_Height = 12; |
| 573 | break; | 556 | break; |
| @@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 586 | } | 569 | } |
| 587 | 570 | ||
| 588 | case 9: { | 571 | case 9: { |
| 589 | uint32_t A = (modeBits >> 5) & 0x3; | 572 | u32 A = (modeBits >> 5) & 0x3; |
| 590 | uint32_t B = (modeBits >> 9) & 0x3; | 573 | u32 B = (modeBits >> 9) & 0x3; |
| 591 | params.m_Width = A + 6; | 574 | params.m_Width = A + 6; |
| 592 | params.m_Height = B + 6; | 575 | params.m_Height = B + 6; |
| 593 | break; | 576 | break; |
| 594 | } | 577 | } |
| 595 | 578 | ||
| 596 | default: | 579 | default: |
| 597 | assert(!"Don't know this layout..."); | 580 | assert(false && "Don't know this layout..."); |
| 598 | params.m_bError = true; | 581 | params.m_bError = true; |
| 599 | break; | 582 | break; |
| 600 | } | 583 | } |
| @@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 605 | bool H = (layout != 9) && (modeBits & 0x200); | 588 | bool H = (layout != 9) && (modeBits & 0x200); |
| 606 | 589 | ||
| 607 | if (H) { | 590 | if (H) { |
| 608 | const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; | 591 | const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; |
| 609 | params.m_MaxWeight = maxWeights[R - 2]; | 592 | params.m_MaxWeight = maxWeights[R - 2]; |
| 610 | } else { | 593 | } else { |
| 611 | const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; | 594 | const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; |
| 612 | params.m_MaxWeight = maxWeights[R - 2]; | 595 | params.m_MaxWeight = maxWeights[R - 2]; |
| 613 | } | 596 | } |
| 614 | 597 | ||
| @@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 617 | return params; | 600 | return params; |
| 618 | } | 601 | } |
| 619 | 602 | ||
| 620 | static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, | 603 | static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, |
| 621 | uint32_t blockHeight) { | 604 | u32 blockHeight) { |
| 622 | // Don't actually care about the void extent, just read the bits... | 605 | // Don't actually care about the void extent, just read the bits... |
| 623 | for (int i = 0; i < 4; ++i) { | 606 | for (s32 i = 0; i < 4; ++i) { |
| 624 | strm.ReadBits(13); | 607 | strm.ReadBits<13>(); |
| 625 | } | 608 | } |
| 626 | 609 | ||
| 627 | // Decode the RGBA components and renormalize them to the range [0, 255] | 610 | // Decode the RGBA components and renormalize them to the range [0, 255] |
| 628 | uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); | 611 | u16 r = static_cast<u16>(strm.ReadBits<16>()); |
| 629 | uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); | 612 | u16 g = static_cast<u16>(strm.ReadBits<16>()); |
| 630 | uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); | 613 | u16 b = static_cast<u16>(strm.ReadBits<16>()); |
| 631 | uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); | 614 | u16 a = static_cast<u16>(strm.ReadBits<16>()); |
| 632 | 615 | ||
| 633 | uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | | 616 | u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | |
| 634 | (static_cast<uint32_t>(a) & 0xFF00) << 16; | 617 | (static_cast<u32>(a) & 0xFF00) << 16; |
| 635 | 618 | ||
| 636 | for (uint32_t j = 0; j < blockHeight; j++) { | 619 | for (u32 j = 0; j < blockHeight; j++) { |
| 637 | for (uint32_t i = 0; i < blockWidth; i++) { | 620 | for (u32 i = 0; i < blockWidth; i++) { |
| 638 | outBuf[j * blockWidth + i] = rgba; | 621 | outBuf[j * blockWidth + i] = rgba; |
| 639 | } | 622 | } |
| 640 | } | 623 | } |
| 641 | } | 624 | } |
| 642 | 625 | ||
| 643 | static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { | 626 | static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { |
| 644 | for (uint32_t j = 0; j < blockHeight; j++) { | 627 | for (u32 j = 0; j < blockHeight; j++) { |
| 645 | for (uint32_t i = 0; i < blockWidth; i++) { | 628 | for (u32 i = 0; i < blockWidth; i++) { |
| 646 | outBuf[j * blockWidth + i] = 0xFFFF00FF; | 629 | outBuf[j * blockWidth + i] = 0xFFFF00FF; |
| 647 | } | 630 | } |
| 648 | } | 631 | } |
| @@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh | |||
| 651 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] |
| 652 | // is the same as [(numBits - 1):0] and repeats all the way down. | 635 | // is the same as [(numBits - 1):0] and repeats all the way down. |
| 653 | template <typename IntType> | 636 | template <typename IntType> |
| 654 | static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | 637 | static IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
| 655 | if (numBits == 0) | 638 | if (numBits == 0) |
| 656 | return 0; | 639 | return 0; |
| 657 | if (toBit == 0) | 640 | if (toBit == 0) |
| 658 | return 0; | 641 | return 0; |
| 659 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); | 642 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); |
| 660 | IntType res = v; | 643 | IntType res = v; |
| 661 | uint32_t reslen = numBits; | 644 | u32 reslen = numBits; |
| 662 | while (reslen < toBit) { | 645 | while (reslen < toBit) { |
| 663 | uint32_t comp = 0; | 646 | u32 comp = 0; |
| 664 | if (numBits > toBit - reslen) { | 647 | if (numBits > toBit - reslen) { |
| 665 | uint32_t newshift = toBit - reslen; | 648 | u32 newshift = toBit - reslen; |
| 666 | comp = numBits - newshift; | 649 | comp = numBits - newshift; |
| 667 | numBits = newshift; | 650 | numBits = newshift; |
| 668 | } | 651 | } |
| @@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { | |||
| 675 | 658 | ||
| 676 | class Pixel { | 659 | class Pixel { |
| 677 | protected: | 660 | protected: |
| 678 | using ChannelType = int16_t; | 661 | using ChannelType = s16; |
| 679 | uint8_t m_BitDepth[4] = {8, 8, 8, 8}; | 662 | u8 m_BitDepth[4] = {8, 8, 8, 8}; |
| 680 | int16_t color[4] = {}; | 663 | s16 color[4] = {}; |
| 681 | 664 | ||
| 682 | public: | 665 | public: |
| 683 | Pixel() = default; | 666 | Pixel() = default; |
| 684 | Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) | 667 | Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) |
| 685 | : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, | 668 | : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, |
| 686 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), | 669 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), |
| 687 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} | 670 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} |
| 688 | 671 | ||
| @@ -691,22 +674,22 @@ public: | |||
| 691 | // significant bits when going from larger to smaller bit depth | 674 | // significant bits when going from larger to smaller bit depth |
| 692 | // or by repeating the most significant bits when going from | 675 | // or by repeating the most significant bits when going from |
| 693 | // smaller to larger bit depths. | 676 | // smaller to larger bit depths. |
| 694 | void ChangeBitDepth(const uint8_t (&depth)[4]) { | 677 | void ChangeBitDepth(const u8 (&depth)[4]) { |
| 695 | for (uint32_t i = 0; i < 4; i++) { | 678 | for (u32 i = 0; i < 4; i++) { |
| 696 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); | 679 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); |
| 697 | m_BitDepth[i] = depth[i]; | 680 | m_BitDepth[i] = depth[i]; |
| 698 | } | 681 | } |
| 699 | } | 682 | } |
| 700 | 683 | ||
| 701 | template <typename IntType> | 684 | template <typename IntType> |
| 702 | static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { | 685 | static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { |
| 703 | float denominator = static_cast<float>((1 << bitDepth) - 1); | 686 | float denominator = static_cast<float>((1 << bitDepth) - 1); |
| 704 | return static_cast<float>(channel) / denominator; | 687 | return static_cast<float>(channel) / denominator; |
| 705 | } | 688 | } |
| 706 | 689 | ||
| 707 | // Changes the bit depth of a single component. See the comment | 690 | // Changes the bit depth of a single component. See the comment |
| 708 | // above for how we do this. | 691 | // above for how we do this. |
| 709 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { | 692 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { |
| 710 | assert(newDepth <= 8); | 693 | assert(newDepth <= 8); |
| 711 | assert(oldDepth <= 8); | 694 | assert(oldDepth <= 8); |
| 712 | 695 | ||
| @@ -722,16 +705,15 @@ public: | |||
| 722 | if (newDepth == 0) { | 705 | if (newDepth == 0) { |
| 723 | return 0xFF; | 706 | return 0xFF; |
| 724 | } else { | 707 | } else { |
| 725 | uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); | 708 | u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); |
| 726 | uint16_t v = static_cast<uint16_t>(val); | 709 | u16 v = static_cast<u16>(val); |
| 727 | v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | 710 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
| 728 | v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), | 711 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); |
| 729 | static_cast<uint16_t>((1 << newDepth) - 1)); | 712 | return static_cast<u8>(v); |
| 730 | return static_cast<uint8_t>(v); | ||
| 731 | } | 713 | } |
| 732 | } | 714 | } |
| 733 | 715 | ||
| 734 | assert(!"We shouldn't get here."); | 716 | assert(false && "We shouldn't get here."); |
| 735 | return 0; | 717 | return 0; |
| 736 | } | 718 | } |
| 737 | 719 | ||
| @@ -759,15 +741,15 @@ public: | |||
| 759 | ChannelType& B() { | 741 | ChannelType& B() { |
| 760 | return color[3]; | 742 | return color[3]; |
| 761 | } | 743 | } |
| 762 | const ChannelType& Component(uint32_t idx) const { | 744 | const ChannelType& Component(u32 idx) const { |
| 763 | return color[idx]; | 745 | return color[idx]; |
| 764 | } | 746 | } |
| 765 | ChannelType& Component(uint32_t idx) { | 747 | ChannelType& Component(u32 idx) { |
| 766 | return color[idx]; | 748 | return color[idx]; |
| 767 | } | 749 | } |
| 768 | 750 | ||
| 769 | void GetBitDepth(uint8_t (&outDepth)[4]) const { | 751 | void GetBitDepth(u8 (&outDepth)[4]) const { |
| 770 | for (int i = 0; i < 4; i++) { | 752 | for (s32 i = 0; i < 4; i++) { |
| 771 | outDepth[i] = m_BitDepth[i]; | 753 | outDepth[i] = m_BitDepth[i]; |
| 772 | } | 754 | } |
| 773 | } | 755 | } |
| @@ -776,12 +758,12 @@ public: | |||
| 776 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume | 758 | // and then pack each channel into an R8G8B8A8 32-bit integer. We assume |
| 777 | // that the architecture is little-endian, so the alpha channel will end | 759 | // that the architecture is little-endian, so the alpha channel will end |
| 778 | // up in the most-significant byte. | 760 | // up in the most-significant byte. |
| 779 | uint32_t Pack() const { | 761 | u32 Pack() const { |
| 780 | Pixel eightBit(*this); | 762 | Pixel eightBit(*this); |
| 781 | const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; | 763 | const u8 eightBitDepth[4] = {8, 8, 8, 8}; |
| 782 | eightBit.ChangeBitDepth(eightBitDepth); | 764 | eightBit.ChangeBitDepth(eightBitDepth); |
| 783 | 765 | ||
| 784 | uint32_t r = 0; | 766 | u32 r = 0; |
| 785 | r |= eightBit.A(); | 767 | r |= eightBit.A(); |
| 786 | r <<= 8; | 768 | r <<= 8; |
| 787 | r |= eightBit.B(); | 769 | r |= eightBit.B(); |
| @@ -794,7 +776,7 @@ public: | |||
| 794 | 776 | ||
| 795 | // Clamps the pixel to the range [0,255] | 777 | // Clamps the pixel to the range [0,255] |
| 796 | void ClampByte() { | 778 | void ClampByte() { |
| 797 | for (uint32_t i = 0; i < 4; i++) { | 779 | for (u32 i = 0; i < 4; i++) { |
| 798 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); | 780 | color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); |
| 799 | } | 781 | } |
| 800 | } | 782 | } |
| @@ -804,24 +786,24 @@ public: | |||
| 804 | } | 786 | } |
| 805 | }; | 787 | }; |
| 806 | 788 | ||
| 807 | static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, | 789 | static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions, |
| 808 | const uint32_t nPartitions, const uint32_t nBitsForColorData) { | 790 | const u32 nBitsForColorData) { |
| 809 | // First figure out how many color values we have | 791 | // First figure out how many color values we have |
| 810 | uint32_t nValues = 0; | 792 | u32 nValues = 0; |
| 811 | for (uint32_t i = 0; i < nPartitions; i++) { | 793 | for (u32 i = 0; i < nPartitions; i++) { |
| 812 | nValues += ((modes[i] >> 2) + 1) << 1; | 794 | nValues += ((modes[i] >> 2) + 1) << 1; |
| 813 | } | 795 | } |
| 814 | 796 | ||
| 815 | // Then based on the number of values and the remaining number of bits, | 797 | // Then based on the number of values and the remaining number of bits, |
| 816 | // figure out the max value for each of them... | 798 | // figure out the max value for each of them... |
| 817 | uint32_t range = 256; | 799 | u32 range = 256; |
| 818 | while (--range > 0) { | 800 | while (--range > 0) { |
| 819 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); | 801 | IntegerEncodedValue val = EncodingsValues[range]; |
| 820 | uint32_t bitLength = val.GetBitLength(nValues); | 802 | u32 bitLength = val.GetBitLength(nValues); |
| 821 | if (bitLength <= nBitsForColorData) { | 803 | if (bitLength <= nBitsForColorData) { |
| 822 | // Find the smallest possible range that matches the given encoding | 804 | // Find the smallest possible range that matches the given encoding |
| 823 | while (--range > 0) { | 805 | while (--range > 0) { |
| 824 | IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); | 806 | IntegerEncodedValue newval = EncodingsValues[range]; |
| 825 | if (!newval.MatchesEncoding(val)) { | 807 | if (!newval.MatchesEncoding(val)) { |
| 826 | break; | 808 | break; |
| 827 | } | 809 | } |
| @@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 835 | 817 | ||
| 836 | // We now have enough to decode our integer sequence. | 818 | // We now have enough to decode our integer sequence. |
| 837 | std::vector<IntegerEncodedValue> decodedColorValues; | 819 | std::vector<IntegerEncodedValue> decodedColorValues; |
| 820 | decodedColorValues.reserve(32); | ||
| 821 | |||
| 838 | InputBitStream colorStream(data); | 822 | InputBitStream colorStream(data); |
| 839 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 823 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| 840 | 824 | ||
| 841 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | 825 | // Once we have the decoded values, we need to dequantize them to the 0-255 range |
| 842 | // This procedure is outlined in ASTC spec C.2.13 | 826 | // This procedure is outlined in ASTC spec C.2.13 |
| 843 | uint32_t outIdx = 0; | 827 | u32 outIdx = 0; |
| 844 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { | 828 | for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { |
| 845 | // Have we already decoded all that we need? | 829 | // Have we already decoded all that we need? |
| 846 | if (outIdx >= nValues) { | 830 | if (outIdx >= nValues) { |
| @@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 848 | } | 832 | } |
| 849 | 833 | ||
| 850 | const IntegerEncodedValue& val = *itr; | 834 | const IntegerEncodedValue& val = *itr; |
| 851 | uint32_t bitlen = val.BaseBitLength(); | 835 | u32 bitlen = val.num_bits; |
| 852 | uint32_t bitval = val.GetBitValue(); | 836 | u32 bitval = val.bit_value; |
| 853 | 837 | ||
| 854 | assert(bitlen >= 1); | 838 | assert(bitlen >= 1); |
| 855 | 839 | ||
| 856 | uint32_t A = 0, B = 0, C = 0, D = 0; | 840 | u32 A = 0, B = 0, C = 0, D = 0; |
| 857 | // A is just the lsb replicated 9 times. | 841 | // A is just the lsb replicated 9 times. |
| 858 | A = Replicate(bitval & 1, 1, 9); | 842 | A = Replicate(bitval & 1, 1, 9); |
| 859 | 843 | ||
| 860 | switch (val.GetEncoding()) { | 844 | switch (val.encoding) { |
| 861 | // Replicate bits | 845 | // Replicate bits |
| 862 | case eIntegerEncoding_JustBits: | 846 | case IntegerEncoding::JustBits: |
| 863 | out[outIdx++] = Replicate(bitval, bitlen, 8); | 847 | out[outIdx++] = Replicate(bitval, bitlen, 8); |
| 864 | break; | 848 | break; |
| 865 | 849 | ||
| 866 | // Use algorithm in C.2.13 | 850 | // Use algorithm in C.2.13 |
| 867 | case eIntegerEncoding_Trit: { | 851 | case IntegerEncoding::Trit: { |
| 868 | 852 | ||
| 869 | D = val.GetTritValue(); | 853 | D = val.trit_value; |
| 870 | 854 | ||
| 871 | switch (bitlen) { | 855 | switch (bitlen) { |
| 872 | case 1: { | 856 | case 1: { |
| @@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 876 | case 2: { | 860 | case 2: { |
| 877 | C = 93; | 861 | C = 93; |
| 878 | // B = b000b0bb0 | 862 | // B = b000b0bb0 |
| 879 | uint32_t b = (bitval >> 1) & 1; | 863 | u32 b = (bitval >> 1) & 1; |
| 880 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); | 864 | B = (b << 8) | (b << 4) | (b << 2) | (b << 1); |
| 881 | } break; | 865 | } break; |
| 882 | 866 | ||
| 883 | case 3: { | 867 | case 3: { |
| 884 | C = 44; | 868 | C = 44; |
| 885 | // B = cb000cbcb | 869 | // B = cb000cbcb |
| 886 | uint32_t cb = (bitval >> 1) & 3; | 870 | u32 cb = (bitval >> 1) & 3; |
| 887 | B = (cb << 7) | (cb << 2) | cb; | 871 | B = (cb << 7) | (cb << 2) | cb; |
| 888 | } break; | 872 | } break; |
| 889 | 873 | ||
| 890 | case 4: { | 874 | case 4: { |
| 891 | C = 22; | 875 | C = 22; |
| 892 | // B = dcb000dcb | 876 | // B = dcb000dcb |
| 893 | uint32_t dcb = (bitval >> 1) & 7; | 877 | u32 dcb = (bitval >> 1) & 7; |
| 894 | B = (dcb << 6) | dcb; | 878 | B = (dcb << 6) | dcb; |
| 895 | } break; | 879 | } break; |
| 896 | 880 | ||
| 897 | case 5: { | 881 | case 5: { |
| 898 | C = 11; | 882 | C = 11; |
| 899 | // B = edcb000ed | 883 | // B = edcb000ed |
| 900 | uint32_t edcb = (bitval >> 1) & 0xF; | 884 | u32 edcb = (bitval >> 1) & 0xF; |
| 901 | B = (edcb << 5) | (edcb >> 2); | 885 | B = (edcb << 5) | (edcb >> 2); |
| 902 | } break; | 886 | } break; |
| 903 | 887 | ||
| 904 | case 6: { | 888 | case 6: { |
| 905 | C = 5; | 889 | C = 5; |
| 906 | // B = fedcb000f | 890 | // B = fedcb000f |
| 907 | uint32_t fedcb = (bitval >> 1) & 0x1F; | 891 | u32 fedcb = (bitval >> 1) & 0x1F; |
| 908 | B = (fedcb << 4) | (fedcb >> 4); | 892 | B = (fedcb << 4) | (fedcb >> 4); |
| 909 | } break; | 893 | } break; |
| 910 | 894 | ||
| 911 | default: | 895 | default: |
| 912 | assert(!"Unsupported trit encoding for color values!"); | 896 | assert(false && "Unsupported trit encoding for color values!"); |
| 913 | break; | 897 | break; |
| 914 | } // switch(bitlen) | 898 | } // switch(bitlen) |
| 915 | } // case eIntegerEncoding_Trit | 899 | } // case IntegerEncoding::Trit |
| 916 | break; | 900 | break; |
| 917 | 901 | ||
| 918 | case eIntegerEncoding_Quint: { | 902 | case IntegerEncoding::Qus32: { |
| 919 | 903 | ||
| 920 | D = val.GetQuintValue(); | 904 | D = val.qus32_value; |
| 921 | 905 | ||
| 922 | switch (bitlen) { | 906 | switch (bitlen) { |
| 923 | case 1: { | 907 | case 1: { |
| @@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 927 | case 2: { | 911 | case 2: { |
| 928 | C = 54; | 912 | C = 54; |
| 929 | // B = b0000bb00 | 913 | // B = b0000bb00 |
| 930 | uint32_t b = (bitval >> 1) & 1; | 914 | u32 b = (bitval >> 1) & 1; |
| 931 | B = (b << 8) | (b << 3) | (b << 2); | 915 | B = (b << 8) | (b << 3) | (b << 2); |
| 932 | } break; | 916 | } break; |
| 933 | 917 | ||
| 934 | case 3: { | 918 | case 3: { |
| 935 | C = 26; | 919 | C = 26; |
| 936 | // B = cb0000cbc | 920 | // B = cb0000cbc |
| 937 | uint32_t cb = (bitval >> 1) & 3; | 921 | u32 cb = (bitval >> 1) & 3; |
| 938 | B = (cb << 7) | (cb << 1) | (cb >> 1); | 922 | B = (cb << 7) | (cb << 1) | (cb >> 1); |
| 939 | } break; | 923 | } break; |
| 940 | 924 | ||
| 941 | case 4: { | 925 | case 4: { |
| 942 | C = 13; | 926 | C = 13; |
| 943 | // B = dcb0000dc | 927 | // B = dcb0000dc |
| 944 | uint32_t dcb = (bitval >> 1) & 7; | 928 | u32 dcb = (bitval >> 1) & 7; |
| 945 | B = (dcb << 6) | (dcb >> 1); | 929 | B = (dcb << 6) | (dcb >> 1); |
| 946 | } break; | 930 | } break; |
| 947 | 931 | ||
| 948 | case 5: { | 932 | case 5: { |
| 949 | C = 6; | 933 | C = 6; |
| 950 | // B = edcb0000e | 934 | // B = edcb0000e |
| 951 | uint32_t edcb = (bitval >> 1) & 0xF; | 935 | u32 edcb = (bitval >> 1) & 0xF; |
| 952 | B = (edcb << 5) | (edcb >> 3); | 936 | B = (edcb << 5) | (edcb >> 3); |
| 953 | } break; | 937 | } break; |
| 954 | 938 | ||
| 955 | default: | 939 | default: |
| 956 | assert(!"Unsupported quint encoding for color values!"); | 940 | assert(false && "Unsupported quint encoding for color values!"); |
| 957 | break; | 941 | break; |
| 958 | } // switch(bitlen) | 942 | } // switch(bitlen) |
| 959 | } // case eIntegerEncoding_Quint | 943 | } // case IntegerEncoding::Qus32 |
| 960 | break; | 944 | break; |
| 961 | } // switch(val.GetEncoding()) | 945 | } // switch(val.encoding) |
| 962 | 946 | ||
| 963 | if (val.GetEncoding() != eIntegerEncoding_JustBits) { | 947 | if (val.encoding != IntegerEncoding::JustBits) { |
| 964 | uint32_t T = D * C + B; | 948 | u32 T = D * C + B; |
| 965 | T ^= A; | 949 | T ^= A; |
| 966 | T = (A & 0x80) | (T >> 2); | 950 | T = (A & 0x80) | (T >> 2); |
| 967 | out[outIdx++] = T; | 951 | out[outIdx++] = T; |
| @@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 969 | } | 953 | } |
| 970 | 954 | ||
| 971 | // Make sure that each of our values is in the proper range... | 955 | // Make sure that each of our values is in the proper range... |
| 972 | for (uint32_t i = 0; i < nValues; i++) { | 956 | for (u32 i = 0; i < nValues; i++) { |
| 973 | assert(out[i] <= 255); | 957 | assert(out[i] <= 255); |
| 974 | } | 958 | } |
| 975 | } | 959 | } |
| 976 | 960 | ||
| 977 | static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | 961 | static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { |
| 978 | uint32_t bitval = val.GetBitValue(); | 962 | u32 bitval = val.bit_value; |
| 979 | uint32_t bitlen = val.BaseBitLength(); | 963 | u32 bitlen = val.num_bits; |
| 980 | 964 | ||
| 981 | uint32_t A = Replicate(bitval & 1, 1, 7); | 965 | u32 A = Replicate(bitval & 1, 1, 7); |
| 982 | uint32_t B = 0, C = 0, D = 0; | 966 | u32 B = 0, C = 0, D = 0; |
| 983 | 967 | ||
| 984 | uint32_t result = 0; | 968 | u32 result = 0; |
| 985 | switch (val.GetEncoding()) { | 969 | switch (val.encoding) { |
| 986 | case eIntegerEncoding_JustBits: | 970 | case IntegerEncoding::JustBits: |
| 987 | result = Replicate(bitval, bitlen, 6); | 971 | result = Replicate(bitval, bitlen, 6); |
| 988 | break; | 972 | break; |
| 989 | 973 | ||
| 990 | case eIntegerEncoding_Trit: { | 974 | case IntegerEncoding::Trit: { |
| 991 | D = val.GetTritValue(); | 975 | D = val.trit_value; |
| 992 | assert(D < 3); | 976 | assert(D < 3); |
| 993 | 977 | ||
| 994 | switch (bitlen) { | 978 | switch (bitlen) { |
| 995 | case 0: { | 979 | case 0: { |
| 996 | uint32_t results[3] = {0, 32, 63}; | 980 | u32 results[3] = {0, 32, 63}; |
| 997 | result = results[D]; | 981 | result = results[D]; |
| 998 | } break; | 982 | } break; |
| 999 | 983 | ||
| @@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1003 | 987 | ||
| 1004 | case 2: { | 988 | case 2: { |
| 1005 | C = 23; | 989 | C = 23; |
| 1006 | uint32_t b = (bitval >> 1) & 1; | 990 | u32 b = (bitval >> 1) & 1; |
| 1007 | B = (b << 6) | (b << 2) | b; | 991 | B = (b << 6) | (b << 2) | b; |
| 1008 | } break; | 992 | } break; |
| 1009 | 993 | ||
| 1010 | case 3: { | 994 | case 3: { |
| 1011 | C = 11; | 995 | C = 11; |
| 1012 | uint32_t cb = (bitval >> 1) & 3; | 996 | u32 cb = (bitval >> 1) & 3; |
| 1013 | B = (cb << 5) | cb; | 997 | B = (cb << 5) | cb; |
| 1014 | } break; | 998 | } break; |
| 1015 | 999 | ||
| 1016 | default: | 1000 | default: |
| 1017 | assert(!"Invalid trit encoding for texel weight"); | 1001 | assert(false && "Invalid trit encoding for texel weight"); |
| 1018 | break; | 1002 | break; |
| 1019 | } | 1003 | } |
| 1020 | } break; | 1004 | } break; |
| 1021 | 1005 | ||
| 1022 | case eIntegerEncoding_Quint: { | 1006 | case IntegerEncoding::Qus32: { |
| 1023 | D = val.GetQuintValue(); | 1007 | D = val.qus32_value; |
| 1024 | assert(D < 5); | 1008 | assert(D < 5); |
| 1025 | 1009 | ||
| 1026 | switch (bitlen) { | 1010 | switch (bitlen) { |
| 1027 | case 0: { | 1011 | case 0: { |
| 1028 | uint32_t results[5] = {0, 16, 32, 47, 63}; | 1012 | u32 results[5] = {0, 16, 32, 47, 63}; |
| 1029 | result = results[D]; | 1013 | result = results[D]; |
| 1030 | } break; | 1014 | } break; |
| 1031 | 1015 | ||
| @@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1035 | 1019 | ||
| 1036 | case 2: { | 1020 | case 2: { |
| 1037 | C = 13; | 1021 | C = 13; |
| 1038 | uint32_t b = (bitval >> 1) & 1; | 1022 | u32 b = (bitval >> 1) & 1; |
| 1039 | B = (b << 6) | (b << 1); | 1023 | B = (b << 6) | (b << 1); |
| 1040 | } break; | 1024 | } break; |
| 1041 | 1025 | ||
| 1042 | default: | 1026 | default: |
| 1043 | assert(!"Invalid quint encoding for texel weight"); | 1027 | assert(false && "Invalid quint encoding for texel weight"); |
| 1044 | break; | 1028 | break; |
| 1045 | } | 1029 | } |
| 1046 | } break; | 1030 | } break; |
| 1047 | } | 1031 | } |
| 1048 | 1032 | ||
| 1049 | if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { | 1033 | if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { |
| 1050 | // Decode the value... | 1034 | // Decode the value... |
| 1051 | result = D * C + B; | 1035 | result = D * C + B; |
| 1052 | result ^= A; | 1036 | result ^= A; |
| @@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1063 | return result; | 1047 | return result; |
| 1064 | } | 1048 | } |
| 1065 | 1049 | ||
| 1066 | static void UnquantizeTexelWeights(uint32_t out[2][144], | 1050 | static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, |
| 1067 | const std::vector<IntegerEncodedValue>& weights, | 1051 | const TexelWeightParams& params, const u32 blockWidth, |
| 1068 | const TexelWeightParams& params, const uint32_t blockWidth, | 1052 | const u32 blockHeight) { |
| 1069 | const uint32_t blockHeight) { | 1053 | u32 weightIdx = 0; |
| 1070 | uint32_t weightIdx = 0; | 1054 | u32 unquantized[2][144]; |
| 1071 | uint32_t unquantized[2][144]; | ||
| 1072 | 1055 | ||
| 1073 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { | 1056 | for (auto itr = weights.begin(); itr != weights.end(); ++itr) { |
| 1074 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); | 1057 | unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); |
| @@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], | |||
| 1086 | } | 1069 | } |
| 1087 | 1070 | ||
| 1088 | // Do infill if necessary (Section C.2.18) ... | 1071 | // Do infill if necessary (Section C.2.18) ... |
| 1089 | uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); | 1072 | u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); |
| 1090 | uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); | 1073 | u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); |
| 1091 | 1074 | ||
| 1092 | const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; | 1075 | const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; |
| 1093 | for (uint32_t plane = 0; plane < kPlaneScale; plane++) | 1076 | for (u32 plane = 0; plane < kPlaneScale; plane++) |
| 1094 | for (uint32_t t = 0; t < blockHeight; t++) | 1077 | for (u32 t = 0; t < blockHeight; t++) |
| 1095 | for (uint32_t s = 0; s < blockWidth; s++) { | 1078 | for (u32 s = 0; s < blockWidth; s++) { |
| 1096 | uint32_t cs = Ds * s; | 1079 | u32 cs = Ds * s; |
| 1097 | uint32_t ct = Dt * t; | 1080 | u32 ct = Dt * t; |
| 1098 | 1081 | ||
| 1099 | uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; | 1082 | u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; |
| 1100 | uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; | 1083 | u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; |
| 1101 | 1084 | ||
| 1102 | uint32_t js = gs >> 4; | 1085 | u32 js = gs >> 4; |
| 1103 | uint32_t fs = gs & 0xF; | 1086 | u32 fs = gs & 0xF; |
| 1104 | 1087 | ||
| 1105 | uint32_t jt = gt >> 4; | 1088 | u32 jt = gt >> 4; |
| 1106 | uint32_t ft = gt & 0x0F; | 1089 | u32 ft = gt & 0x0F; |
| 1107 | 1090 | ||
| 1108 | uint32_t w11 = (fs * ft + 8) >> 4; | 1091 | u32 w11 = (fs * ft + 8) >> 4; |
| 1109 | uint32_t w10 = ft - w11; | 1092 | u32 w10 = ft - w11; |
| 1110 | uint32_t w01 = fs - w11; | 1093 | u32 w01 = fs - w11; |
| 1111 | uint32_t w00 = 16 - fs - ft + w11; | 1094 | u32 w00 = 16 - fs - ft + w11; |
| 1112 | 1095 | ||
| 1113 | uint32_t v0 = js + jt * params.m_Width; | 1096 | u32 v0 = js + jt * params.m_Width; |
| 1114 | 1097 | ||
| 1115 | #define FIND_TEXEL(tidx, bidx) \ | 1098 | #define FIND_TEXEL(tidx, bidx) \ |
| 1116 | uint32_t p##bidx = 0; \ | 1099 | u32 p##bidx = 0; \ |
| 1117 | do { \ | 1100 | do { \ |
| 1118 | if ((tidx) < (params.m_Width * params.m_Height)) { \ | 1101 | if ((tidx) < (params.m_Width * params.m_Height)) { \ |
| 1119 | p##bidx = unquantized[plane][(tidx)]; \ | 1102 | p##bidx = unquantized[plane][(tidx)]; \ |
| @@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], | |||
| 1133 | } | 1116 | } |
| 1134 | 1117 | ||
| 1135 | // Transfers a bit as described in C.2.14 | 1118 | // Transfers a bit as described in C.2.14 |
| 1136 | static inline void BitTransferSigned(int32_t& a, int32_t& b) { | 1119 | static inline void BitTransferSigned(s32& a, s32& b) { |
| 1137 | b >>= 1; | 1120 | b >>= 1; |
| 1138 | b |= a & 0x80; | 1121 | b |= a & 0x80; |
| 1139 | a >>= 1; | 1122 | a >>= 1; |
| @@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) { | |||
| 1144 | 1127 | ||
| 1145 | // Adds more precision to the blue channel as described | 1128 | // Adds more precision to the blue channel as described |
| 1146 | // in C.2.14 | 1129 | // in C.2.14 |
| 1147 | static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { | 1130 | static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { |
| 1148 | return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), | 1131 | return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), |
| 1149 | static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); | 1132 | static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); |
| 1150 | } | 1133 | } |
| 1151 | 1134 | ||
| 1152 | // Partition selection functions as specified in | 1135 | // Partition selection functions as specified in |
| 1153 | // C.2.21 | 1136 | // C.2.21 |
| 1154 | static inline uint32_t hash52(uint32_t p) { | 1137 | static inline u32 hash52(u32 p) { |
| 1155 | p ^= p >> 15; | 1138 | p ^= p >> 15; |
| 1156 | p -= p << 17; | 1139 | p -= p << 17; |
| 1157 | p += p << 7; | 1140 | p += p << 7; |
| @@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) { | |||
| 1165 | return p; | 1148 | return p; |
| 1166 | } | 1149 | } |
| 1167 | 1150 | ||
| 1168 | static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | 1151 | static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { |
| 1169 | int32_t partitionCount, int32_t smallBlock) { | ||
| 1170 | if (1 == partitionCount) | 1152 | if (1 == partitionCount) |
| 1171 | return 0; | 1153 | return 0; |
| 1172 | 1154 | ||
| @@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1178 | 1160 | ||
| 1179 | seed += (partitionCount - 1) * 1024; | 1161 | seed += (partitionCount - 1) * 1024; |
| 1180 | 1162 | ||
| 1181 | uint32_t rnum = hash52(static_cast<uint32_t>(seed)); | 1163 | u32 rnum = hash52(static_cast<u32>(seed)); |
| 1182 | uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); | 1164 | u8 seed1 = static_cast<u8>(rnum & 0xF); |
| 1183 | uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); | 1165 | u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); |
| 1184 | uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); | 1166 | u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); |
| 1185 | uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); | 1167 | u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); |
| 1186 | uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); | 1168 | u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); |
| 1187 | uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); | 1169 | u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); |
| 1188 | uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); | 1170 | u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); |
| 1189 | uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); | 1171 | u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); |
| 1190 | uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); | 1172 | u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); |
| 1191 | uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); | 1173 | u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); |
| 1192 | uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); | 1174 | u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); |
| 1193 | uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); | 1175 | u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); |
| 1194 | 1176 | ||
| 1195 | seed1 = static_cast<uint8_t>(seed1 * seed1); | 1177 | seed1 = static_cast<u8>(seed1 * seed1); |
| 1196 | seed2 = static_cast<uint8_t>(seed2 * seed2); | 1178 | seed2 = static_cast<u8>(seed2 * seed2); |
| 1197 | seed3 = static_cast<uint8_t>(seed3 * seed3); | 1179 | seed3 = static_cast<u8>(seed3 * seed3); |
| 1198 | seed4 = static_cast<uint8_t>(seed4 * seed4); | 1180 | seed4 = static_cast<u8>(seed4 * seed4); |
| 1199 | seed5 = static_cast<uint8_t>(seed5 * seed5); | 1181 | seed5 = static_cast<u8>(seed5 * seed5); |
| 1200 | seed6 = static_cast<uint8_t>(seed6 * seed6); | 1182 | seed6 = static_cast<u8>(seed6 * seed6); |
| 1201 | seed7 = static_cast<uint8_t>(seed7 * seed7); | 1183 | seed7 = static_cast<u8>(seed7 * seed7); |
| 1202 | seed8 = static_cast<uint8_t>(seed8 * seed8); | 1184 | seed8 = static_cast<u8>(seed8 * seed8); |
| 1203 | seed9 = static_cast<uint8_t>(seed9 * seed9); | 1185 | seed9 = static_cast<u8>(seed9 * seed9); |
| 1204 | seed10 = static_cast<uint8_t>(seed10 * seed10); | 1186 | seed10 = static_cast<u8>(seed10 * seed10); |
| 1205 | seed11 = static_cast<uint8_t>(seed11 * seed11); | 1187 | seed11 = static_cast<u8>(seed11 * seed11); |
| 1206 | seed12 = static_cast<uint8_t>(seed12 * seed12); | 1188 | seed12 = static_cast<u8>(seed12 * seed12); |
| 1207 | 1189 | ||
| 1208 | int32_t sh1, sh2, sh3; | 1190 | s32 sh1, sh2, sh3; |
| 1209 | if (seed & 1) { | 1191 | if (seed & 1) { |
| 1210 | sh1 = (seed & 2) ? 4 : 5; | 1192 | sh1 = (seed & 2) ? 4 : 5; |
| 1211 | sh2 = (partitionCount == 3) ? 6 : 5; | 1193 | sh2 = (partitionCount == 3) ? 6 : 5; |
| @@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1215 | } | 1197 | } |
| 1216 | sh3 = (seed & 0x10) ? sh1 : sh2; | 1198 | sh3 = (seed & 0x10) ? sh1 : sh2; |
| 1217 | 1199 | ||
| 1218 | seed1 = static_cast<uint8_t>(seed1 >> sh1); | 1200 | seed1 = static_cast<u8>(seed1 >> sh1); |
| 1219 | seed2 = static_cast<uint8_t>(seed2 >> sh2); | 1201 | seed2 = static_cast<u8>(seed2 >> sh2); |
| 1220 | seed3 = static_cast<uint8_t>(seed3 >> sh1); | 1202 | seed3 = static_cast<u8>(seed3 >> sh1); |
| 1221 | seed4 = static_cast<uint8_t>(seed4 >> sh2); | 1203 | seed4 = static_cast<u8>(seed4 >> sh2); |
| 1222 | seed5 = static_cast<uint8_t>(seed5 >> sh1); | 1204 | seed5 = static_cast<u8>(seed5 >> sh1); |
| 1223 | seed6 = static_cast<uint8_t>(seed6 >> sh2); | 1205 | seed6 = static_cast<u8>(seed6 >> sh2); |
| 1224 | seed7 = static_cast<uint8_t>(seed7 >> sh1); | 1206 | seed7 = static_cast<u8>(seed7 >> sh1); |
| 1225 | seed8 = static_cast<uint8_t>(seed8 >> sh2); | 1207 | seed8 = static_cast<u8>(seed8 >> sh2); |
| 1226 | seed9 = static_cast<uint8_t>(seed9 >> sh3); | 1208 | seed9 = static_cast<u8>(seed9 >> sh3); |
| 1227 | seed10 = static_cast<uint8_t>(seed10 >> sh3); | 1209 | seed10 = static_cast<u8>(seed10 >> sh3); |
| 1228 | seed11 = static_cast<uint8_t>(seed11 >> sh3); | 1210 | seed11 = static_cast<u8>(seed11 >> sh3); |
| 1229 | seed12 = static_cast<uint8_t>(seed12 >> sh3); | 1211 | seed12 = static_cast<u8>(seed12 >> sh3); |
| 1230 | 1212 | ||
| 1231 | int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | 1213 | s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); |
| 1232 | int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | 1214 | s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); |
| 1233 | int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | 1215 | s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); |
| 1234 | int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | 1216 | s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); |
| 1235 | 1217 | ||
| 1236 | a &= 0x3F; | 1218 | a &= 0x3F; |
| 1237 | b &= 0x3F; | 1219 | b &= 0x3F; |
| @@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, | |||
| 1252 | return 3; | 1234 | return 3; |
| 1253 | } | 1235 | } |
| 1254 | 1236 | ||
| 1255 | static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, | 1237 | static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { |
| 1256 | int32_t smallBlock) { | ||
| 1257 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); | 1238 | return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); |
| 1258 | } | 1239 | } |
| 1259 | 1240 | ||
| 1260 | // Section C.2.14 | 1241 | // Section C.2.14 |
| 1261 | static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, | 1242 | static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, |
| 1262 | uint32_t colorEndpointMode) { | 1243 | u32 colorEndpos32Mode) { |
| 1263 | #define READ_UINT_VALUES(N) \ | 1244 | #define READ_UINT_VALUES(N) \ |
| 1264 | uint32_t v[N]; \ | 1245 | u32 v[N]; \ |
| 1265 | for (uint32_t i = 0; i < N; i++) { \ | 1246 | for (u32 i = 0; i < N; i++) { \ |
| 1266 | v[i] = *(colorValues++); \ | 1247 | v[i] = *(colorValues++); \ |
| 1267 | } | 1248 | } |
| 1268 | 1249 | ||
| 1269 | #define READ_INT_VALUES(N) \ | 1250 | #define READ_INT_VALUES(N) \ |
| 1270 | int32_t v[N]; \ | 1251 | s32 v[N]; \ |
| 1271 | for (uint32_t i = 0; i < N; i++) { \ | 1252 | for (u32 i = 0; i < N; i++) { \ |
| 1272 | v[i] = static_cast<int32_t>(*(colorValues++)); \ | 1253 | v[i] = static_cast<s32>(*(colorValues++)); \ |
| 1273 | } | 1254 | } |
| 1274 | 1255 | ||
| 1275 | switch (colorEndpointMode) { | 1256 | switch (colorEndpos32Mode) { |
| 1276 | case 0: { | 1257 | case 0: { |
| 1277 | READ_UINT_VALUES(2) | 1258 | READ_UINT_VALUES(2) |
| 1278 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); | 1259 | ep1 = Pixel(0xFF, v[0], v[0], v[0]); |
| @@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1281 | 1262 | ||
| 1282 | case 1: { | 1263 | case 1: { |
| 1283 | READ_UINT_VALUES(2) | 1264 | READ_UINT_VALUES(2) |
| 1284 | uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); | 1265 | u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); |
| 1285 | uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); | 1266 | u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); |
| 1286 | ep1 = Pixel(0xFF, L0, L0, L0); | 1267 | ep1 = Pixel(0xFF, L0, L0, L0); |
| 1287 | ep2 = Pixel(0xFF, L1, L1, L1); | 1268 | ep2 = Pixel(0xFF, L1, L1, L1); |
| 1288 | } break; | 1269 | } break; |
| @@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1371 | } break; | 1352 | } break; |
| 1372 | 1353 | ||
| 1373 | default: | 1354 | default: |
| 1374 | assert(!"Unsupported color endpoint mode (is it HDR?)"); | 1355 | assert(false && "Unsupported color endpoint mode (is it HDR?)"); |
| 1375 | break; | 1356 | break; |
| 1376 | } | 1357 | } |
| 1377 | 1358 | ||
| @@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1379 | #undef READ_INT_VALUES | 1360 | #undef READ_INT_VALUES |
| 1380 | } | 1361 | } |
| 1381 | 1362 | ||
| 1382 | static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | 1363 | static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, |
| 1383 | const uint32_t blockHeight, uint32_t* outBuf) { | 1364 | u32* outBuf) { |
| 1384 | InputBitStream strm(inBuf); | 1365 | InputBitStream strm(inBuf); |
| 1385 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1366 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1386 | 1367 | ||
| 1387 | // Was there an error? | 1368 | // Was there an error? |
| 1388 | if (weightParams.m_bError) { | 1369 | if (weightParams.m_bError) { |
| 1389 | assert(!"Invalid block mode"); | 1370 | assert(false && "Invalid block mode"); |
| 1390 | FillError(outBuf, blockWidth, blockHeight); | 1371 | FillError(outBuf, blockWidth, blockHeight); |
| 1391 | return; | 1372 | return; |
| 1392 | } | 1373 | } |
| @@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1397 | } | 1378 | } |
| 1398 | 1379 | ||
| 1399 | if (weightParams.m_bVoidExtentHDR) { | 1380 | if (weightParams.m_bVoidExtentHDR) { |
| 1400 | assert(!"HDR void extent blocks are unsupported!"); | 1381 | assert(false && "HDR void extent blocks are unsupported!"); |
| 1401 | FillError(outBuf, blockWidth, blockHeight); | 1382 | FillError(outBuf, blockWidth, blockHeight); |
| 1402 | return; | 1383 | return; |
| 1403 | } | 1384 | } |
| 1404 | 1385 | ||
| 1405 | if (weightParams.m_Width > blockWidth) { | 1386 | if (weightParams.m_Width > blockWidth) { |
| 1406 | assert(!"Texel weight grid width should be smaller than block width"); | 1387 | assert(false && "Texel weight grid width should be smaller than block width"); |
| 1407 | FillError(outBuf, blockWidth, blockHeight); | 1388 | FillError(outBuf, blockWidth, blockHeight); |
| 1408 | return; | 1389 | return; |
| 1409 | } | 1390 | } |
| 1410 | 1391 | ||
| 1411 | if (weightParams.m_Height > blockHeight) { | 1392 | if (weightParams.m_Height > blockHeight) { |
| 1412 | assert(!"Texel weight grid height should be smaller than block height"); | 1393 | assert(false && "Texel weight grid height should be smaller than block height"); |
| 1413 | FillError(outBuf, blockWidth, blockHeight); | 1394 | FillError(outBuf, blockWidth, blockHeight); |
| 1414 | return; | 1395 | return; |
| 1415 | } | 1396 | } |
| 1416 | 1397 | ||
| 1417 | // Read num partitions | 1398 | // Read num partitions |
| 1418 | uint32_t nPartitions = strm.ReadBits(2) + 1; | 1399 | u32 nPartitions = strm.ReadBits<2>() + 1; |
| 1419 | assert(nPartitions <= 4); | 1400 | assert(nPartitions <= 4); |
| 1420 | 1401 | ||
| 1421 | if (nPartitions == 4 && weightParams.m_bDualPlane) { | 1402 | if (nPartitions == 4 && weightParams.m_bDualPlane) { |
| 1422 | assert(!"Dual plane mode is incompatible with four partition blocks"); | 1403 | assert(false && "Dual plane mode is incompatible with four partition blocks"); |
| 1423 | FillError(outBuf, blockWidth, blockHeight); | 1404 | FillError(outBuf, blockWidth, blockHeight); |
| 1424 | return; | 1405 | return; |
| 1425 | } | 1406 | } |
| 1426 | 1407 | ||
| 1427 | // Based on the number of partitions, read the color endpoint mode for | 1408 | // Based on the number of partitions, read the color endpos32 mode for |
| 1428 | // each partition. | 1409 | // each partition. |
| 1429 | 1410 | ||
| 1430 | // Determine partitions, partition index, and color endpoint modes | 1411 | // Determine partitions, partition index, and color endpos32 modes |
| 1431 | int32_t planeIdx = -1; | 1412 | s32 planeIdx = -1; |
| 1432 | uint32_t partitionIndex; | 1413 | u32 partitionIndex; |
| 1433 | uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; | 1414 | u32 colorEndpos32Mode[4] = {0, 0, 0, 0}; |
| 1434 | 1415 | ||
| 1435 | // Define color data. | 1416 | // Define color data. |
| 1436 | uint8_t colorEndpointData[16]; | 1417 | u8 colorEndpos32Data[16]; |
| 1437 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | 1418 | memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data)); |
| 1438 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | 1419 | OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0); |
| 1439 | 1420 | ||
| 1440 | // Read extra config data... | 1421 | // Read extra config data... |
| 1441 | uint32_t baseCEM = 0; | 1422 | u32 baseCEM = 0; |
| 1442 | if (nPartitions == 1) { | 1423 | if (nPartitions == 1) { |
| 1443 | colorEndpointMode[0] = strm.ReadBits(4); | 1424 | colorEndpos32Mode[0] = strm.ReadBits<4>(); |
| 1444 | partitionIndex = 0; | 1425 | partitionIndex = 0; |
| 1445 | } else { | 1426 | } else { |
| 1446 | partitionIndex = strm.ReadBits(10); | 1427 | partitionIndex = strm.ReadBits<10>(); |
| 1447 | baseCEM = strm.ReadBits(6); | 1428 | baseCEM = strm.ReadBits<6>(); |
| 1448 | } | 1429 | } |
| 1449 | uint32_t baseMode = (baseCEM & 3); | 1430 | u32 baseMode = (baseCEM & 3); |
| 1450 | 1431 | ||
| 1451 | // Remaining bits are color endpoint data... | 1432 | // Remaining bits are color endpos32 data... |
| 1452 | uint32_t nWeightBits = weightParams.GetPackedBitSize(); | 1433 | u32 nWeightBits = weightParams.GetPackedBitSize(); |
| 1453 | int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); | 1434 | s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead()); |
| 1454 | 1435 | ||
| 1455 | // Consider extra bits prior to texel data... | 1436 | // Consider extra bits prior to texel data... |
| 1456 | uint32_t extraCEMbits = 0; | 1437 | u32 extraCEMbits = 0; |
| 1457 | if (baseMode) { | 1438 | if (baseMode) { |
| 1458 | switch (nPartitions) { | 1439 | switch (nPartitions) { |
| 1459 | case 2: | 1440 | case 2: |
| @@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1473 | remainingBits -= extraCEMbits; | 1454 | remainingBits -= extraCEMbits; |
| 1474 | 1455 | ||
| 1475 | // Do we have a dual plane situation? | 1456 | // Do we have a dual plane situation? |
| 1476 | uint32_t planeSelectorBits = 0; | 1457 | u32 planeSelectorBits = 0; |
| 1477 | if (weightParams.m_bDualPlane) { | 1458 | if (weightParams.m_bDualPlane) { |
| 1478 | planeSelectorBits = 2; | 1459 | planeSelectorBits = 2; |
| 1479 | } | 1460 | } |
| 1480 | remainingBits -= planeSelectorBits; | 1461 | remainingBits -= planeSelectorBits; |
| 1481 | 1462 | ||
| 1482 | // Read color data... | 1463 | // Read color data... |
| 1483 | uint32_t colorDataBits = remainingBits; | 1464 | u32 colorDataBits = remainingBits; |
| 1484 | while (remainingBits > 0) { | 1465 | while (remainingBits > 0) { |
| 1485 | uint32_t nb = std::min(remainingBits, 8); | 1466 | u32 nb = std::min(remainingBits, 8); |
| 1486 | uint32_t b = strm.ReadBits(nb); | 1467 | u32 b = strm.ReadBits(nb); |
| 1487 | colorEndpointStream.WriteBits(b, nb); | 1468 | colorEndpos32Stream.WriteBits(b, nb); |
| 1488 | remainingBits -= 8; | 1469 | remainingBits -= 8; |
| 1489 | } | 1470 | } |
| 1490 | 1471 | ||
| @@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1493 | 1474 | ||
| 1494 | // Read the rest of the CEM | 1475 | // Read the rest of the CEM |
| 1495 | if (baseMode) { | 1476 | if (baseMode) { |
| 1496 | uint32_t extraCEM = strm.ReadBits(extraCEMbits); | 1477 | u32 extraCEM = strm.ReadBits(extraCEMbits); |
| 1497 | uint32_t CEM = (extraCEM << 6) | baseCEM; | 1478 | u32 CEM = (extraCEM << 6) | baseCEM; |
| 1498 | CEM >>= 2; | 1479 | CEM >>= 2; |
| 1499 | 1480 | ||
| 1500 | bool C[4] = {0}; | 1481 | bool C[4] = {0}; |
| 1501 | for (uint32_t i = 0; i < nPartitions; i++) { | 1482 | for (u32 i = 0; i < nPartitions; i++) { |
| 1502 | C[i] = CEM & 1; | 1483 | C[i] = CEM & 1; |
| 1503 | CEM >>= 1; | 1484 | CEM >>= 1; |
| 1504 | } | 1485 | } |
| 1505 | 1486 | ||
| 1506 | uint8_t M[4] = {0}; | 1487 | u8 M[4] = {0}; |
| 1507 | for (uint32_t i = 0; i < nPartitions; i++) { | 1488 | for (u32 i = 0; i < nPartitions; i++) { |
| 1508 | M[i] = CEM & 3; | 1489 | M[i] = CEM & 3; |
| 1509 | CEM >>= 2; | 1490 | CEM >>= 2; |
| 1510 | assert(M[i] <= 3); | 1491 | assert(M[i] <= 3); |
| 1511 | } | 1492 | } |
| 1512 | 1493 | ||
| 1513 | for (uint32_t i = 0; i < nPartitions; i++) { | 1494 | for (u32 i = 0; i < nPartitions; i++) { |
| 1514 | colorEndpointMode[i] = baseMode; | 1495 | colorEndpos32Mode[i] = baseMode; |
| 1515 | if (!(C[i])) | 1496 | if (!(C[i])) |
| 1516 | colorEndpointMode[i] -= 1; | 1497 | colorEndpos32Mode[i] -= 1; |
| 1517 | colorEndpointMode[i] <<= 2; | 1498 | colorEndpos32Mode[i] <<= 2; |
| 1518 | colorEndpointMode[i] |= M[i]; | 1499 | colorEndpos32Mode[i] |= M[i]; |
| 1519 | } | 1500 | } |
| 1520 | } else if (nPartitions > 1) { | 1501 | } else if (nPartitions > 1) { |
| 1521 | uint32_t CEM = baseCEM >> 2; | 1502 | u32 CEM = baseCEM >> 2; |
| 1522 | for (uint32_t i = 0; i < nPartitions; i++) { | 1503 | for (u32 i = 0; i < nPartitions; i++) { |
| 1523 | colorEndpointMode[i] = CEM; | 1504 | colorEndpos32Mode[i] = CEM; |
| 1524 | } | 1505 | } |
| 1525 | } | 1506 | } |
| 1526 | 1507 | ||
| 1527 | // Make sure everything up till here is sane. | 1508 | // Make sure everything up till here is sane. |
| 1528 | for (uint32_t i = 0; i < nPartitions; i++) { | 1509 | for (u32 i = 0; i < nPartitions; i++) { |
| 1529 | assert(colorEndpointMode[i] < 16); | 1510 | assert(colorEndpos32Mode[i] < 16); |
| 1530 | } | 1511 | } |
| 1531 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); | 1512 | assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); |
| 1532 | 1513 | ||
| 1533 | // Decode both color data and texel weight data | 1514 | // Decode both color data and texel weight data |
| 1534 | uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions | 1515 | u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions |
| 1535 | DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, | 1516 | DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions, |
| 1536 | colorDataBits); | 1517 | colorDataBits); |
| 1537 | 1518 | ||
| 1538 | Pixel endpoints[4][2]; | 1519 | Pixel endpos32s[4][2]; |
| 1539 | const uint32_t* colorValuesPtr = colorValues; | 1520 | const u32* colorValuesPtr = colorValues; |
| 1540 | for (uint32_t i = 0; i < nPartitions; i++) { | 1521 | for (u32 i = 0; i < nPartitions; i++) { |
| 1541 | ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); | 1522 | ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]); |
| 1542 | } | 1523 | } |
| 1543 | 1524 | ||
| 1544 | // Read the texel weight data.. | 1525 | // Read the texel weight data.. |
| 1545 | uint8_t texelWeightData[16]; | 1526 | u8 texelWeightData[16]; |
| 1546 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); | 1527 | memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); |
| 1547 | 1528 | ||
| 1548 | // Reverse everything | 1529 | // Reverse everything |
| 1549 | for (uint32_t i = 0; i < 8; i++) { | 1530 | for (u32 i = 0; i < 8; i++) { |
| 1550 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits | 1531 | // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits |
| 1551 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 | 1532 | #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 |
| 1552 | unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); | 1533 | u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); |
| 1553 | unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); | 1534 | u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); |
| 1554 | #undef REVERSE_BYTE | 1535 | #undef REVERSE_BYTE |
| 1555 | 1536 | ||
| 1556 | texelWeightData[i] = b; | 1537 | texelWeightData[i] = b; |
| @@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1558 | } | 1539 | } |
| 1559 | 1540 | ||
| 1560 | // Make sure that higher non-texel bits are set to zero | 1541 | // Make sure that higher non-texel bits are set to zero |
| 1561 | const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; | 1542 | const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; |
| 1562 | texelWeightData[clearByteStart - 1] = | 1543 | texelWeightData[clearByteStart - 1] = |
| 1563 | texelWeightData[clearByteStart - 1] & | 1544 | texelWeightData[clearByteStart - 1] & |
| 1564 | static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1545 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1546 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1566 | 1547 | ||
| 1567 | std::vector<IntegerEncodedValue> texelWeightValues; | 1548 | std::vector<IntegerEncodedValue> texelWeightValues; |
| 1549 | texelWeightValues.reserve(64); | ||
| 1550 | |||
| 1568 | InputBitStream weightStream(texelWeightData); | 1551 | InputBitStream weightStream(texelWeightData); |
| 1569 | 1552 | ||
| 1570 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, | 1553 | DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, |
| 1571 | weightParams.m_MaxWeight, | 1554 | weightParams.GetNumWeightValues()); |
| 1572 | weightParams.GetNumWeightValues()); | ||
| 1573 | 1555 | ||
| 1574 | // Blocks can be at most 12x12, so we can have as many as 144 weights | 1556 | // Blocks can be at most 12x12, so we can have as many as 144 weights |
| 1575 | uint32_t weights[2][144]; | 1557 | u32 weights[2][144]; |
| 1576 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); | 1558 | UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); |
| 1577 | 1559 | ||
| 1578 | // Now that we have endpoints and weights, we can interpolate and generate | 1560 | // Now that we have endpos32s and weights, we can s32erpolate and generate |
| 1579 | // the proper decoding... | 1561 | // the proper decoding... |
| 1580 | for (uint32_t j = 0; j < blockHeight; j++) | 1562 | for (u32 j = 0; j < blockHeight; j++) |
| 1581 | for (uint32_t i = 0; i < blockWidth; i++) { | 1563 | for (u32 i = 0; i < blockWidth; i++) { |
| 1582 | uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, | 1564 | u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, |
| 1583 | (blockHeight * blockWidth) < 32); | 1565 | (blockHeight * blockWidth) < 32); |
| 1584 | assert(partition < nPartitions); | 1566 | assert(partition < nPartitions); |
| 1585 | 1567 | ||
| 1586 | Pixel p; | 1568 | Pixel p; |
| 1587 | for (uint32_t c = 0; c < 4; c++) { | 1569 | for (u32 c = 0; c < 4; c++) { |
| 1588 | uint32_t C0 = endpoints[partition][0].Component(c); | 1570 | u32 C0 = endpos32s[partition][0].Component(c); |
| 1589 | C0 = Replicate(C0, 8, 16); | 1571 | C0 = Replicate(C0, 8, 16); |
| 1590 | uint32_t C1 = endpoints[partition][1].Component(c); | 1572 | u32 C1 = endpos32s[partition][1].Component(c); |
| 1591 | C1 = Replicate(C1, 8, 16); | 1573 | C1 = Replicate(C1, 8, 16); |
| 1592 | 1574 | ||
| 1593 | uint32_t plane = 0; | 1575 | u32 plane = 0; |
| 1594 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | 1576 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |
| 1595 | plane = 1; | 1577 | plane = 1; |
| 1596 | } | 1578 | } |
| 1597 | 1579 | ||
| 1598 | uint32_t weight = weights[plane][j * blockWidth + i]; | 1580 | u32 weight = weights[plane][j * blockWidth + i]; |
| 1599 | uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; | 1581 | u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; |
| 1600 | if (C == 65535) { | 1582 | if (C == 65535) { |
| 1601 | p.Component(c) = 255; | 1583 | p.Component(c) = 255; |
| 1602 | } else { | 1584 | } else { |
| 1603 | double Cf = static_cast<double>(C); | 1585 | double Cf = static_cast<double>(C); |
| 1604 | p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); | 1586 | p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); |
| 1605 | } | 1587 | } |
| 1606 | } | 1588 | } |
| 1607 | 1589 | ||
| @@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1613 | 1595 | ||
| 1614 | namespace Tegra::Texture::ASTC { | 1596 | namespace Tegra::Texture::ASTC { |
| 1615 | 1597 | ||
| 1616 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, | 1598 | std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, |
| 1617 | uint32_t depth, uint32_t block_width, uint32_t block_height) { | 1599 | u32 block_height) { |
| 1618 | uint32_t blockIdx = 0; | 1600 | u32 blockIdx = 0; |
| 1619 | std::size_t depth_offset = 0; | 1601 | std::size_t depth_offset = 0; |
| 1620 | std::vector<uint8_t> outData(height * width * depth * 4); | 1602 | std::vector<u8> outData(height * width * depth * 4); |
| 1621 | for (uint32_t k = 0; k < depth; k++) { | 1603 | for (u32 k = 0; k < depth; k++) { |
| 1622 | for (uint32_t j = 0; j < height; j += block_height) { | 1604 | for (u32 j = 0; j < height; j += block_height) { |
| 1623 | for (uint32_t i = 0; i < width; i += block_width) { | 1605 | for (u32 i = 0; i < width; i += block_width) { |
| 1624 | 1606 | ||
| 1625 | const uint8_t* blockPtr = data + blockIdx * 16; | 1607 | const u8* blockPtr = data + blockIdx * 16; |
| 1626 | 1608 | ||
| 1627 | // Blocks can be at most 12x12 | 1609 | // Blocks can be at most 12x12 |
| 1628 | uint32_t uncompData[144]; | 1610 | u32 uncompData[144]; |
| 1629 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); | 1611 | ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); |
| 1630 | 1612 | ||
| 1631 | uint32_t decompWidth = std::min(block_width, width - i); | 1613 | u32 decompWidth = std::min(block_width, width - i); |
| 1632 | uint32_t decompHeight = std::min(block_height, height - j); | 1614 | u32 decompHeight = std::min(block_height, height - j); |
| 1633 | 1615 | ||
| 1634 | uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; | 1616 | u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; |
| 1635 | for (uint32_t jj = 0; jj < decompHeight; jj++) { | 1617 | for (u32 jj = 0; jj < decompHeight; jj++) { |
| 1636 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); | 1618 | memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); |
| 1637 | } | 1619 | } |
| 1638 | 1620 | ||
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index c38860628..e26af33b3 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -698,6 +698,8 @@ void Config::ReadSystemValues() { | |||
| 698 | Settings::values.custom_rtc = std::nullopt; | 698 | Settings::values.custom_rtc = std::nullopt; |
| 699 | } | 699 | } |
| 700 | 700 | ||
| 701 | Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt(); | ||
| 702 | |||
| 701 | qt_config->endGroup(); | 703 | qt_config->endGroup(); |
| 702 | } | 704 | } |
| 703 | 705 | ||
| @@ -1125,6 +1127,8 @@ void Config::SaveSystemValues() { | |||
| 1125 | Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), | 1127 | Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), |
| 1126 | 0); | 1128 | 0); |
| 1127 | 1129 | ||
| 1130 | WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1); | ||
| 1131 | |||
| 1128 | qt_config->endGroup(); | 1132 | qt_config->endGroup(); |
| 1129 | } | 1133 | } |
| 1130 | 1134 | ||
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index e1b52f8d9..f9a5b4fbe 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp | |||
| @@ -56,6 +56,7 @@ void ConfigureSystem::SetConfiguration() { | |||
| 56 | enabled = !Core::System::GetInstance().IsPoweredOn(); | 56 | enabled = !Core::System::GetInstance().IsPoweredOn(); |
| 57 | 57 | ||
| 58 | ui->combo_language->setCurrentIndex(Settings::values.language_index); | 58 | ui->combo_language->setCurrentIndex(Settings::values.language_index); |
| 59 | ui->combo_sound->setCurrentIndex(Settings::values.sound_index); | ||
| 59 | 60 | ||
| 60 | ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); | 61 | ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); |
| 61 | ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); | 62 | ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); |
| @@ -81,6 +82,7 @@ void ConfigureSystem::ApplyConfiguration() { | |||
| 81 | } | 82 | } |
| 82 | 83 | ||
| 83 | Settings::values.language_index = ui->combo_language->currentIndex(); | 84 | Settings::values.language_index = ui->combo_language->currentIndex(); |
| 85 | Settings::values.sound_index = ui->combo_sound->currentIndex(); | ||
| 84 | 86 | ||
| 85 | if (ui->rng_seed_checkbox->isChecked()) { | 87 | if (ui->rng_seed_checkbox->isChecked()) { |
| 86 | Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); | 88 | Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); |
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp index 4f2bfab48..2a6483370 100644 --- a/src/yuzu/loading_screen.cpp +++ b/src/yuzu/loading_screen.cpp | |||
| @@ -34,18 +34,6 @@ constexpr char PROGRESSBAR_STYLE_PREPARE[] = R"( | |||
| 34 | QProgressBar {} | 34 | QProgressBar {} |
| 35 | QProgressBar::chunk {})"; | 35 | QProgressBar::chunk {})"; |
| 36 | 36 | ||
| 37 | constexpr char PROGRESSBAR_STYLE_DECOMPILE[] = R"( | ||
| 38 | QProgressBar { | ||
| 39 | background-color: black; | ||
| 40 | border: 2px solid white; | ||
| 41 | border-radius: 4px; | ||
| 42 | padding: 2px; | ||
| 43 | } | ||
| 44 | QProgressBar::chunk { | ||
| 45 | background-color: #0ab9e6; | ||
| 46 | width: 1px; | ||
| 47 | })"; | ||
| 48 | |||
| 49 | constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( | 37 | constexpr char PROGRESSBAR_STYLE_BUILD[] = R"( |
| 50 | QProgressBar { | 38 | QProgressBar { |
| 51 | background-color: black; | 39 | background-color: black; |
| @@ -100,13 +88,11 @@ LoadingScreen::LoadingScreen(QWidget* parent) | |||
| 100 | 88 | ||
| 101 | stage_translations = { | 89 | stage_translations = { |
| 102 | {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, | 90 | {VideoCore::LoadCallbackStage::Prepare, tr("Loading...")}, |
| 103 | {VideoCore::LoadCallbackStage::Decompile, tr("Preparing Shaders %1 / %2")}, | ||
| 104 | {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, | 91 | {VideoCore::LoadCallbackStage::Build, tr("Loading Shaders %1 / %2")}, |
| 105 | {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, | 92 | {VideoCore::LoadCallbackStage::Complete, tr("Launching...")}, |
| 106 | }; | 93 | }; |
| 107 | progressbar_style = { | 94 | progressbar_style = { |
| 108 | {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, | 95 | {VideoCore::LoadCallbackStage::Prepare, PROGRESSBAR_STYLE_PREPARE}, |
| 109 | {VideoCore::LoadCallbackStage::Decompile, PROGRESSBAR_STYLE_DECOMPILE}, | ||
| 110 | {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, | 96 | {VideoCore::LoadCallbackStage::Build, PROGRESSBAR_STYLE_BUILD}, |
| 111 | {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, | 97 | {VideoCore::LoadCallbackStage::Complete, PROGRESSBAR_STYLE_COMPLETE}, |
| 112 | }; | 98 | }; |
| @@ -192,8 +178,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size | |||
| 192 | } | 178 | } |
| 193 | 179 | ||
| 194 | // update labels and progress bar | 180 | // update labels and progress bar |
| 195 | if (stage == VideoCore::LoadCallbackStage::Decompile || | 181 | if (stage == VideoCore::LoadCallbackStage::Build) { |
| 196 | stage == VideoCore::LoadCallbackStage::Build) { | ||
| 197 | ui->stage->setText(stage_translations[stage].arg(value).arg(total)); | 182 | ui->stage->setText(stage_translations[stage].arg(value).arg(total)); |
| 198 | } else { | 183 | } else { |
| 199 | ui->stage->setText(stage_translations[stage]); | 184 | ui->stage->setText(stage_translations[stage]); |