diff options
| author | 2023-04-14 16:56:34 -0700 | |
|---|---|---|
| committer | 2023-04-14 16:56:34 -0700 | |
| commit | e0895a85810d76d810b40ade50dc514a459b685e (patch) | |
| tree | cf3d44618ee0757f4994917066ba48c3b8151ac6 /src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |
| parent | Merge pull request #10055 from v1993/patch-1 (diff) | |
| parent | video_core: Enable ImageGather rounding fix on AMD open source drivers (diff) | |
| download | yuzu-e0895a85810d76d810b40ade50dc514a459b685e.tar.gz yuzu-e0895a85810d76d810b40ade50dc514a459b685e.tar.xz yuzu-e0895a85810d76d810b40ade50dc514a459b685e.zip | |
Merge pull request #10030 from Wollnashorn/botw-amd-fix
shader_recompiler: Fix ImageGather rounding on AMD/Intel
Diffstat (limited to 'src/shader_recompiler/backend/spirv/emit_spirv_image.cpp')
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 02073c420..7d901c04b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -261,6 +261,30 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) { | |||
| 261 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; | 261 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; |
| 262 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); | 262 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); |
| 263 | } | 263 | } |
| 264 | |||
| 265 | Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture, | ||
| 266 | Id coords) { | ||
| 267 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 268 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 269 | const auto calculate_coords{[&](size_t dim) { | ||
| 270 | const Id nudge{ctx.Const(0x1p-9f)}; | ||
| 271 | const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)}; | ||
| 272 | Id offset{dim == 2 ? ctx.ConstantComposite(ctx.F32[dim], nudge, nudge) | ||
| 273 | : ctx.ConstantComposite(ctx.F32[dim], nudge, nudge, ctx.f32_zero_value)}; | ||
| 274 | offset = ctx.OpFDiv(ctx.F32[dim], offset, ctx.OpConvertUToF(ctx.F32[dim], image_size)); | ||
| 275 | return ctx.OpFAdd(ctx.F32[dim], coords, offset); | ||
| 276 | }}; | ||
| 277 | switch (info.type) { | ||
| 278 | case TextureType::Color2D: | ||
| 279 | case TextureType::Color2DRect: | ||
| 280 | return calculate_coords(2); | ||
| 281 | case TextureType::ColorArray2D: | ||
| 282 | case TextureType::ColorCube: | ||
| 283 | return calculate_coords(3); | ||
| 284 | default: | ||
| 285 | return coords; | ||
| 286 | } | ||
| 287 | } | ||
| 264 | } // Anonymous namespace | 288 | } // Anonymous namespace |
| 265 | 289 | ||
| 266 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | 290 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { |
| @@ -423,6 +447,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id | |||
| 423 | const IR::Value& offset, const IR::Value& offset2) { | 447 | const IR::Value& offset, const IR::Value& offset2) { |
| 424 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 448 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 425 | const ImageOperands operands(ctx, offset, offset2); | 449 | const ImageOperands operands(ctx, offset, offset2); |
| 450 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 451 | coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); | ||
| 452 | } | ||
| 426 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | 453 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, |
| 427 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), | 454 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), |
| 428 | operands.MaskOptional(), operands.Span()); | 455 | operands.MaskOptional(), operands.Span()); |
| @@ -432,6 +459,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | |||
| 432 | const IR::Value& offset, const IR::Value& offset2, Id dref) { | 459 | const IR::Value& offset, const IR::Value& offset2, Id dref) { |
| 433 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 460 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 434 | const ImageOperands operands(ctx, offset, offset2); | 461 | const ImageOperands operands(ctx, offset, offset2); |
| 462 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 463 | coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); | ||
| 464 | } | ||
| 435 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | 465 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, |
| 436 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), | 466 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), |
| 437 | operands.Span()); | 467 | operands.Span()); |