diff options
| author | 2023-04-05 03:02:24 +0200 | |
|---|---|---|
| committer | 2023-04-08 16:12:44 +0200 | |
| commit | fe91066f4673f7a3ee87235f08b72db4910eb01c (patch) | |
| tree | 7228ef7eb04f1f0ceb289d54daeba42d0d2719c8 | |
| parent | shader_recompiler: Add subpixel offset for correct rounding at `ImageGather` (diff) | |
| download | yuzu-fe91066f4673f7a3ee87235f08b72db4910eb01c.tar.gz yuzu-fe91066f4673f7a3ee87235f08b72db4910eb01c.tar.xz yuzu-fe91066f4673f7a3ee87235f08b72db4910eb01c.zip | |
video_core: Enable ImageGather with subpixel offset on Intel
Diffstat (limited to '')
| -rw-r--r-- | src/shader_recompiler/profile.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 5 |
7 files changed, 11 insertions, 17 deletions
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 31390e869..9f88fb440 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -52,9 +52,9 @@ struct Profile { | |||
| 52 | bool need_declared_frag_colors{}; | 52 | bool need_declared_frag_colors{}; |
| 53 | /// Prevents fast math optimizations that may cause inaccuracies | 53 | /// Prevents fast math optimizations that may cause inaccuracies |
| 54 | bool need_fastmath_off{}; | 54 | bool need_fastmath_off{}; |
| 55 | /// Some GPU vendors use a lower fixed point format of 16.8 when calculating pixel coordinates | 55 | /// Some GPU vendors use a different rounding precision when calculating texture pixel |
| 56 | /// in the ImageGather instruction than the Maxwell architecture does. Applying an offset does | 56 | /// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell |
| 57 | /// fix this mismatching rounding behaviour. | 57 | /// architecture. Applying an offset does fix this mismatching rounding behaviour. |
| 58 | bool need_gather_subpixel_offset{}; | 58 | bool need_gather_subpixel_offset{}; |
| 59 | 59 | ||
| 60 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | 60 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index d36a0a7a1..22ed16ebf 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -169,7 +169,6 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 169 | has_draw_texture = GLAD_GL_NV_draw_texture; | 169 | has_draw_texture = GLAD_GL_NV_draw_texture; |
| 170 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | 170 | warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; |
| 171 | need_fastmath_off = is_nvidia; | 171 | need_fastmath_off = is_nvidia; |
| 172 | need_gather_subpixel_offset = is_amd; | ||
| 173 | can_report_memory = GLAD_GL_NVX_gpu_memory_info; | 172 | can_report_memory = GLAD_GL_NVX_gpu_memory_info; |
| 174 | 173 | ||
| 175 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | 174 | // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e8104c4de..cc0b95f1a 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -160,10 +160,6 @@ public: | |||
| 160 | return need_fastmath_off; | 160 | return need_fastmath_off; |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | bool NeedsGatherSubpixelOffset() const { | ||
| 164 | return need_gather_subpixel_offset; | ||
| 165 | } | ||
| 166 | |||
| 167 | bool HasCbufFtouBug() const { | 163 | bool HasCbufFtouBug() const { |
| 168 | return has_cbuf_ftou_bug; | 164 | return has_cbuf_ftou_bug; |
| 169 | } | 165 | } |
| @@ -180,6 +176,10 @@ public: | |||
| 180 | return vendor_name == "ATI Technologies Inc."; | 176 | return vendor_name == "ATI Technologies Inc."; |
| 181 | } | 177 | } |
| 182 | 178 | ||
| 179 | bool IsIntel() const { | ||
| 180 | return vendor_name == "Intel"; | ||
| 181 | } | ||
| 182 | |||
| 183 | bool CanReportMemoryUsage() const { | 183 | bool CanReportMemoryUsage() const { |
| 184 | return can_report_memory; | 184 | return can_report_memory; |
| 185 | } | 185 | } |
| @@ -229,7 +229,6 @@ private: | |||
| 229 | bool has_draw_texture{}; | 229 | bool has_draw_texture{}; |
| 230 | bool warp_size_potentially_larger_than_guest{}; | 230 | bool warp_size_potentially_larger_than_guest{}; |
| 231 | bool need_fastmath_off{}; | 231 | bool need_fastmath_off{}; |
| 232 | bool need_gather_subpixel_offset{}; | ||
| 233 | bool has_cbuf_ftou_bug{}; | 232 | bool has_cbuf_ftou_bug{}; |
| 234 | bool has_bool_ref_bug{}; | 233 | bool has_bool_ref_bug{}; |
| 235 | bool can_report_memory{}; | 234 | bool can_report_memory{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b40aa6f5e..6ecda2984 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -218,7 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 218 | .lower_left_origin_mode = true, | 218 | .lower_left_origin_mode = true, |
| 219 | .need_declared_frag_colors = true, | 219 | .need_declared_frag_colors = true, |
| 220 | .need_fastmath_off = device.NeedsFastmathOff(), | 220 | .need_fastmath_off = device.NeedsFastmathOff(), |
| 221 | .need_gather_subpixel_offset = device.NeedsGatherSubpixelOffset(), | 221 | .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(), |
| 222 | 222 | ||
| 223 | .has_broken_spirv_clamp = true, | 223 | .has_broken_spirv_clamp = true, |
| 224 | .has_broken_unsigned_image_offsets = true, | 224 | .has_broken_unsigned_image_offsets = true, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f51257267..8963b6a66 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -329,7 +329,9 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 329 | 329 | ||
| 330 | .lower_left_origin_mode = false, | 330 | .lower_left_origin_mode = false, |
| 331 | .need_declared_frag_colors = false, | 331 | .need_declared_frag_colors = false, |
| 332 | .need_gather_subpixel_offset = device.NeedsGatherSubpixelOffset(), | 332 | .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || |
| 333 | driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || | ||
| 334 | driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, | ||
| 333 | 335 | ||
| 334 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, | 336 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, |
| 335 | .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | 337 | .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0939b62c9..6f288b3f8 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -431,7 +431,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 431 | "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); | 431 | "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); |
| 432 | has_broken_cube_compatibility = true; | 432 | has_broken_cube_compatibility = true; |
| 433 | } | 433 | } |
| 434 | need_gather_subpixel_offset = true; | ||
| 435 | } | 434 | } |
| 436 | if (extensions.sampler_filter_minmax && is_amd) { | 435 | if (extensions.sampler_filter_minmax && is_amd) { |
| 437 | // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. | 436 | // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 50e95bcca..41b5da18a 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -554,10 +554,6 @@ public: | |||
| 554 | return features.robustness2.nullDescriptor; | 554 | return features.robustness2.nullDescriptor; |
| 555 | } | 555 | } |
| 556 | 556 | ||
| 557 | bool NeedsGatherSubpixelOffset() const { | ||
| 558 | return need_gather_subpixel_offset; | ||
| 559 | } | ||
| 560 | |||
| 561 | u32 GetMaxVertexInputAttributes() const { | 557 | u32 GetMaxVertexInputAttributes() const { |
| 562 | return properties.properties.limits.maxVertexInputAttributes; | 558 | return properties.properties.limits.maxVertexInputAttributes; |
| 563 | } | 559 | } |
| @@ -668,7 +664,6 @@ private: | |||
| 668 | bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. | 664 | bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. |
| 669 | bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. | 665 | bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. |
| 670 | bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. | 666 | bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. |
| 671 | bool need_gather_subpixel_offset{}; ///< Needs offset at ImageGather for correct rounding. | ||
| 672 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. | 667 | u64 device_access_memory{}; ///< Total size of device local memory in bytes. |
| 673 | u32 sets_per_pool{}; ///< Sets per Description Pool | 668 | u32 sets_per_pool{}; ///< Sets per Description Pool |
| 674 | 669 | ||