diff options
| author | 2023-04-14 16:56:34 -0700 | |
|---|---|---|
| committer | 2023-04-14 16:56:34 -0700 | |
| commit | e0895a85810d76d810b40ade50dc514a459b685e (patch) | |
| tree | cf3d44618ee0757f4994917066ba48c3b8151ac6 /src | |
| parent | Merge pull request #10055 from v1993/patch-1 (diff) | |
| parent | video_core: Enable ImageGather rounding fix on AMD open source drivers (diff) | |
| download | yuzu-e0895a85810d76d810b40ade50dc514a459b685e.tar.gz yuzu-e0895a85810d76d810b40ade50dc514a459b685e.tar.xz yuzu-e0895a85810d76d810b40ade50dc514a459b685e.zip | |
Merge pull request #10030 from Wollnashorn/botw-amd-fix
shader_recompiler: Fix ImageGather rounding on AMD/Intel
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | 29 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 30 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 5 |
6 files changed, 73 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index f335c8af0..418505475 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -143,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { | |||
| 143 | } | 143 | } |
| 144 | return sparse_inst; | 144 | return sparse_inst; |
| 145 | } | 145 | } |
| 146 | |||
| 147 | std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture, | ||
| 148 | std::string_view coords) { | ||
| 149 | switch (info.type) { | ||
| 150 | case TextureType::Color2D: | ||
| 151 | case TextureType::Color2DRect: | ||
| 152 | return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture); | ||
| 153 | case TextureType::ColorArray2D: | ||
| 154 | case TextureType::ColorCube: | ||
| 155 | return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords, | ||
| 156 | texture); | ||
| 157 | default: | ||
| 158 | return std::string{coords}; | ||
| 159 | } | ||
| 160 | } | ||
| 146 | } // Anonymous namespace | 161 | } // Anonymous namespace |
| 147 | 162 | ||
| 148 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 163 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| @@ -340,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 340 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | 355 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); |
| 341 | ctx.AddU1("{}=true;", *sparse_inst); | 356 | ctx.AddU1("{}=true;", *sparse_inst); |
| 342 | } | 357 | } |
| 358 | std::string coords_with_subpixel_offset; | ||
| 359 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 360 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 361 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 362 | coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); | ||
| 363 | coords = coords_with_subpixel_offset; | ||
| 364 | } | ||
| 343 | if (!sparse_inst || !supports_sparse) { | 365 | if (!sparse_inst || !supports_sparse) { |
| 344 | if (offset.IsEmpty()) { | 366 | if (offset.IsEmpty()) { |
| 345 | ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, | 367 | ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, |
| @@ -387,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde | |||
| 387 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | 409 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); |
| 388 | ctx.AddU1("{}=true;", *sparse_inst); | 410 | ctx.AddU1("{}=true;", *sparse_inst); |
| 389 | } | 411 | } |
| 412 | std::string coords_with_subpixel_offset; | ||
| 413 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 414 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 415 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 416 | coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); | ||
| 417 | coords = coords_with_subpixel_offset; | ||
| 418 | } | ||
| 390 | if (!sparse_inst || !supports_sparse) { | 419 | if (!sparse_inst || !supports_sparse) { |
| 391 | if (offset.IsEmpty()) { | 420 | if (offset.IsEmpty()) { |
| 392 | ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); | 421 | ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 02073c420..7d901c04b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -261,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) { | |||
| 261 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; | 261 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; |
| 262 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); | 262 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); |
| 263 | } | 263 | } |
| 264 | |||
| 265 | Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture, | ||
| 266 | Id coords) { | ||
| 267 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 268 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 269 | const auto calculate_coords{[&](size_t dim) { | ||
| 270 | const Id nudge{ctx.Const(0x1p-9f)}; | ||
| 271 | const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)}; | ||
| 272 | Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge) | ||
| 273 | : ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)}; | ||
| 274 | offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size)); | ||
| 275 | return ctx.OpFAdd(ctx.F32[dim], coords, offset); | ||
| 276 | }}; | ||
| 277 | switch (info.type) { | ||
| 278 | case TextureType::Color2D: | ||
| 279 | case TextureType::Color2DRect: | ||
| 280 | return calculate_coords(2); | ||
| 281 | case TextureType::ColorArray2D: | ||
| 282 | case TextureType::ColorCube: | ||
| 283 | return calculate_coords(3); | ||
| 284 | default: | ||
| 285 | return coords; | ||
| 286 | } | ||
| 287 | } | ||
| 264 | } // Anonymous namespace | 288 | } // Anonymous namespace |
| 265 | 289 | ||
| 266 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | 290 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { |
| @@ -423,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id | |||
| 423 | const IR::Value& offset, const IR::Value& offset2) { | 447 | const IR::Value& offset, const IR::Value& offset2) { |
| 424 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 448 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 425 | const ImageOperands operands(ctx, offset, offset2); | 449 | const ImageOperands operands(ctx, offset, offset2); |
| 450 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 451 | coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); | ||
| 452 | } | ||
| 426 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | 453 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, |
| 427 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), | 454 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), |
| 428 | operands.MaskOptional(), operands.Span()); | 455 | operands.MaskOptional(), operands.Span()); |
| @@ -432,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | |||
| 432 | const IR::Value& offset, const IR::Value& offset2, Id dref) { | 459 | const IR::Value& offset, const IR::Value& offset2, Id dref) { |
| 433 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 460 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 434 | const ImageOperands operands(ctx, offset, offset2); | 461 | const ImageOperands operands(ctx, offset, offset2); |
| 462 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 463 | coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); | ||
| 464 | } | ||
| 435 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | 465 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, |
| 436 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), | 466 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), |
| 437 | operands.Span()); | 467 | operands.Span()); |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 253e0d0bd..9f88fb440 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -52,6 +52,10 @@ struct Profile { | |||
| 52 | bool need_declared_frag_colors{}; | 52 | bool need_declared_frag_colors{}; |
| 53 | /// Prevents fast math optimizations that may cause inaccuracies | 53 | /// Prevents fast math optimizations that may cause inaccuracies |
| 54 | bool need_fastmath_off{}; | 54 | bool need_fastmath_off{}; |
| 55 | /// Some GPU vendors use a different rounding precision when calculating texture pixel | ||
| 56 | /// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell | ||
| 57 | /// architecture. Applying an offset does fix this mismatching rounding behaviour. | ||
| 58 | bool need_gather_subpixel_offset{}; | ||
| 55 | 59 | ||
| 56 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | 60 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead |
| 57 | bool has_broken_spirv_clamp{}; | 61 | bool has_broken_spirv_clamp{}; |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 3ff8cad83..cc0b95f1a 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -176,6 +176,10 @@ public: | |||
| 176 | return vendor_name == "ATI Technologies Inc."; | 176 | return vendor_name == "ATI Technologies Inc."; |
| 177 | } | 177 | } |
| 178 | 178 | ||
| 179 | bool IsIntel() const { | ||
| 180 | return vendor_name == "Intel"; | ||
| 181 | } | ||
| 182 | |||
| 179 | bool CanReportMemoryUsage() const { | 183 | bool CanReportMemoryUsage() const { |
| 180 | return can_report_memory; | 184 | return can_report_memory; |
| 181 | } | 185 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 479bb8ba3..6ecda2984 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -218,6 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
| 218 | .lower_left_origin_mode = true, | 218 | .lower_left_origin_mode = true, |
| 219 | .need_declared_frag_colors = true, | 219 | .need_declared_frag_colors = true, |
| 220 | .need_fastmath_off = device.NeedsFastmathOff(), | 220 | .need_fastmath_off = device.NeedsFastmathOff(), |
| 221 | .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(), | ||
| 221 | 222 | ||
| 222 | .has_broken_spirv_clamp = true, | 223 | .has_broken_spirv_clamp = true, |
| 223 | .has_broken_unsigned_image_offsets = true, | 224 | .has_broken_unsigned_image_offsets = true, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 0684cceed..985cc3203 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -329,6 +329,11 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 329 | 329 | ||
| 330 | .lower_left_origin_mode = false, | 330 | .lower_left_origin_mode = false, |
| 331 | .need_declared_frag_colors = false, | 331 | .need_declared_frag_colors = false, |
| 332 | .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || | ||
| 333 | driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE || | ||
| 334 | driver_id == VK_DRIVER_ID_MESA_RADV || | ||
| 335 | driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || | ||
| 336 | driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, | ||
| 332 | 337 | ||
| 333 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, | 338 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, |
| 334 | .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | 339 | .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, |