diff options
| author | 2021-06-16 03:03:08 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:38 -0400 | |
| commit | cbbca26d182991abf68d9b2e1b1e5935bf4eb476 (patch) | |
| tree | 09662bca5ff6bfa688e409294562ed1cdd8754b2 | |
| parent | shader: Rename maxwell/program.h to translate_program.h (diff) | |
| download | yuzu-cbbca26d182991abf68d9b2e1b1e5935bf4eb476.tar.gz yuzu-cbbca26d182991abf68d9b2e1b1e5935bf4eb476.tar.xz yuzu-cbbca26d182991abf68d9b2e1b1e5935bf4eb476.zip | |
shader: Add support for native 16-bit floats
9 files changed, 50 insertions, 14 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index f801a9f72..164e94071 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC | |||
| 211 | frontend/maxwell/translate/translate.h | 211 | frontend/maxwell/translate/translate.h |
| 212 | frontend/maxwell/translate_program.cpp | 212 | frontend/maxwell/translate_program.cpp |
| 213 | frontend/maxwell/translate_program.h | 213 | frontend/maxwell/translate_program.h |
| 214 | host_translate_info.h | ||
| 214 | ir_opt/collect_shader_info_pass.cpp | 215 | ir_opt/collect_shader_info_pass.cpp |
| 215 | ir_opt/constant_propagation_pass.cpp | 216 | ir_opt/constant_propagation_pass.cpp |
| 216 | ir_opt/dead_code_elimination_pass.cpp | 217 | ir_opt/dead_code_elimination_pass.cpp |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index e52170e3e..5250509c1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | 13 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" |
| 14 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | 14 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" |
| 15 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | 15 | #include "shader_recompiler/frontend/maxwell/translate_program.h" |
| 16 | #include "shader_recompiler/host_translate_info.h" | ||
| 16 | #include "shader_recompiler/ir_opt/passes.h" | 17 | #include "shader_recompiler/ir_opt/passes.h" |
| 17 | 18 | ||
| 18 | namespace Shader::Maxwell { | 19 | namespace Shader::Maxwell { |
| @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { | |||
| 120 | } // Anonymous namespace | 121 | } // Anonymous namespace |
| 121 | 122 | ||
| 122 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 123 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| 123 | Environment& env, Flow::CFG& cfg) { | 124 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { |
| 124 | IR::Program program; | 125 | IR::Program program; |
| 125 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | 126 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); |
| 126 | program.blocks = GenerateBlocks(program.syntax_list); | 127 | program.blocks = GenerateBlocks(program.syntax_list); |
| @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 150 | RemoveUnreachableBlocks(program); | 151 | RemoveUnreachableBlocks(program); |
| 151 | 152 | ||
| 152 | // Replace instructions before the SSA rewrite | 153 | // Replace instructions before the SSA rewrite |
| 153 | Optimization::LowerFp16ToFp32(program); | 154 | if (!host_info.support_float16) { |
| 154 | 155 | Optimization::LowerFp16ToFp32(program); | |
| 156 | } | ||
| 155 | Optimization::SsaRewritePass(program); | 157 | Optimization::SsaRewritePass(program); |
| 156 | 158 | ||
| 157 | Optimization::GlobalMemoryToStorageBufferPass(program); | 159 | Optimization::GlobalMemoryToStorageBufferPass(program); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h index 1e5536443..a84814811 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.h +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h | |||
| @@ -8,13 +8,14 @@ | |||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | 8 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 9 | #include "shader_recompiler/frontend/ir/program.h" | 9 | #include "shader_recompiler/frontend/ir/program.h" |
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" |
| 11 | #include "shader_recompiler/host_translate_info.h" | ||
| 11 | #include "shader_recompiler/object_pool.h" | 12 | #include "shader_recompiler/object_pool.h" |
| 12 | 13 | ||
| 13 | namespace Shader::Maxwell { | 14 | namespace Shader::Maxwell { |
| 14 | 15 | ||
| 15 | [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, | 16 | [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, |
| 16 | ObjectPool<IR::Block>& block_pool, Environment& env, | 17 | ObjectPool<IR::Block>& block_pool, Environment& env, |
| 17 | Flow::CFG& cfg); | 18 | Flow::CFG& cfg, const HostTranslateInfo& host_info); |
| 18 | 19 | ||
| 19 | [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | 20 | [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, |
| 20 | Environment& env_vertex_b); | 21 | Environment& env_vertex_b); |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h new file mode 100644 index 000000000..94a584219 --- /dev/null +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace Shader { | ||
| 8 | |||
| 9 | // Try to keep entries here to a minimum | ||
| 10 | // They can accidentally change the cached information in a shader | ||
| 11 | |||
| 12 | /// Misc information about the host | ||
| 13 | struct HostTranslateInfo { | ||
| 14 | bool support_float16{}; ///< True when the device supports 16-bit floats | ||
| 15 | bool support_int64{}; ///< True when the device supports 64-bit integers | ||
| 16 | }; | ||
| 17 | |||
| 18 | } // namespace Shader | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c05cd5d28..b459397f5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 201 | .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), | 201 | .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), |
| 202 | .has_gl_precise_bug = device.HasPreciseBug(), | 202 | .has_gl_precise_bug = device.HasPreciseBug(), |
| 203 | .ignore_nan_fp_comparisons = true, | 203 | .ignore_nan_fp_comparisons = true, |
| 204 | }, | ||
| 205 | host_info{ | ||
| 206 | .support_float16 = false, | ||
| 207 | .support_int64 = true, | ||
| 204 | } { | 208 | } { |
| 205 | if (use_asynchronous_shaders) { | 209 | if (use_asynchronous_shaders) { |
| 206 | workers = CreateWorkers(); | 210 | workers = CreateWorkers(); |
| @@ -373,15 +377,15 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | |||
| 373 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | 377 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); |
| 374 | if (!uses_vertex_a || index != 1) { | 378 | if (!uses_vertex_a || index != 1) { |
| 375 | // Normal path | 379 | // Normal path |
| 376 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | 380 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); |
| 377 | 381 | ||
| 378 | for (const auto& desc : programs[index].info.storage_buffers_descriptors) { | 382 | for (const auto& desc : programs[index].info.storage_buffers_descriptors) { |
| 379 | total_storage_buffers += desc.count; | 383 | total_storage_buffers += desc.count; |
| 380 | } | 384 | } |
| 381 | } else { | 385 | } else { |
| 382 | // VertexB path when VertexA is present. | 386 | // VertexB path when VertexA is present. |
| 383 | Shader::IR::Program& program_va{programs[0]}; | 387 | auto& program_va{programs[0]}; |
| 384 | Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; | 388 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 385 | for (const auto& desc : program_vb.info.storage_buffers_descriptors) { | 389 | for (const auto& desc : program_vb.info.storage_buffers_descriptors) { |
| 386 | total_storage_buffers += desc.count; | 390 | total_storage_buffers += desc.count; |
| 387 | } | 391 | } |
| @@ -449,7 +453,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | |||
| 449 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | 453 | LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); |
| 450 | 454 | ||
| 451 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 455 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 452 | Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | 456 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 453 | 457 | ||
| 454 | u32 num_storage_buffers{}; | 458 | u32 num_storage_buffers{}; |
| 455 | for (const auto& desc : program.info.storage_buffers_descriptors) { | 459 | for (const auto& desc : program.info.storage_buffers_descriptors) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d24b54d90..6952a1f2c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/thread_worker.h" | 15 | #include "common/thread_worker.h" |
| 16 | #include "shader_recompiler/frontend/ir/value.h" | 16 | #include "shader_recompiler/frontend/ir/value.h" |
| 17 | #include "shader_recompiler/host_translate_info.h" | ||
| 17 | #include "shader_recompiler/object_pool.h" | 18 | #include "shader_recompiler/object_pool.h" |
| 18 | #include "video_core/engines/shader_type.h" | 19 | #include "video_core/engines/shader_type.h" |
| 19 | #include "video_core/renderer_opengl/gl_compute_pipeline.h" | 20 | #include "video_core/renderer_opengl/gl_compute_pipeline.h" |
| @@ -82,6 +83,8 @@ private: | |||
| 82 | std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; | 83 | std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; |
| 83 | 84 | ||
| 84 | Shader::Profile profile; | 85 | Shader::Profile profile; |
| 86 | Shader::HostTranslateInfo host_info; | ||
| 87 | |||
| 85 | std::filesystem::path shader_cache_filename; | 88 | std::filesystem::path shader_cache_filename; |
| 86 | std::unique_ptr<ShaderWorker> workers; | 89 | std::unique_ptr<ShaderWorker> workers; |
| 87 | }; | 90 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0b6fe8e2e..72e6f4207 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw | |||
| 307 | .has_broken_signed_operations = false, | 307 | .has_broken_signed_operations = false, |
| 308 | .ignore_nan_fp_comparisons = false, | 308 | .ignore_nan_fp_comparisons = false, |
| 309 | }; | 309 | }; |
| 310 | host_info = Shader::HostTranslateInfo{ | ||
| 311 | .support_float16 = device.IsFloat16Supported(), | ||
| 312 | .support_int64 = true, | ||
| 313 | }; | ||
| 310 | } | 314 | } |
| 311 | 315 | ||
| 312 | PipelineCache::~PipelineCache() = default; | 316 | PipelineCache::~PipelineCache() = default; |
| @@ -484,11 +488,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 484 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | 488 | Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); |
| 485 | if (!uses_vertex_a || index != 1) { | 489 | if (!uses_vertex_a || index != 1) { |
| 486 | // Normal path | 490 | // Normal path |
| 487 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | 491 | programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); |
| 488 | } else { | 492 | } else { |
| 489 | // VertexB path when VertexA is present. | 493 | // VertexB path when VertexA is present. |
| 490 | Shader::IR::Program& program_va{programs[0]}; | 494 | auto& program_va{programs[0]}; |
| 491 | Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; | 495 | auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 492 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | 496 | programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); |
| 493 | } | 497 | } |
| 494 | } | 498 | } |
| @@ -575,7 +579,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 575 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | 579 | LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); |
| 576 | 580 | ||
| 577 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | 581 | Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; |
| 578 | Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | 582 | auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; |
| 579 | const std::vector<u32> code{EmitSPIRV(profile, program)}; | 583 | const std::vector<u32> code{EmitSPIRV(profile, program)}; |
| 580 | device.SaveShader(code); | 584 | device.SaveShader(code); |
| 581 | vk::ShaderModule spv_module{BuildShader(device, code)}; | 585 | vk::ShaderModule spv_module{BuildShader(device, code)}; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 167a2ee2e..42da2960b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "shader_recompiler/frontend/ir/basic_block.h" | 19 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 20 | #include "shader_recompiler/frontend/ir/value.h" | 20 | #include "shader_recompiler/frontend/ir/value.h" |
| 21 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | 21 | #include "shader_recompiler/frontend/maxwell/control_flow.h" |
| 22 | #include "shader_recompiler/host_translate_info.h" | ||
| 22 | #include "shader_recompiler/object_pool.h" | 23 | #include "shader_recompiler/object_pool.h" |
| 23 | #include "shader_recompiler/profile.h" | 24 | #include "shader_recompiler/profile.h" |
| 24 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| @@ -157,6 +158,8 @@ private: | |||
| 157 | ShaderPools main_pools; | 158 | ShaderPools main_pools; |
| 158 | 159 | ||
| 159 | Shader::Profile profile; | 160 | Shader::Profile profile; |
| 161 | Shader::HostTranslateInfo host_info; | ||
| 162 | |||
| 160 | std::filesystem::path pipeline_cache_filename; | 163 | std::filesystem::path pipeline_cache_filename; |
| 161 | 164 | ||
| 162 | Common::ThreadWorker workers; | 165 | Common::ThreadWorker workers; |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 9754abcf8..0d8c6cd08 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 497 | } | 497 | } |
| 498 | if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | 498 | if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { |
| 499 | // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. | 499 | // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. |
| 500 | // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); | 500 | LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); |
| 501 | // is_float16_supported = false; | 501 | is_float16_supported = false; |
| 502 | } | 502 | } |
| 503 | 503 | ||
| 504 | graphics_queue = logical.GetQueue(graphics_family); | 504 | graphics_queue = logical.GetQueue(graphics_family); |