diff options
Diffstat (limited to 'src/shader_recompiler')
| -rw-r--r-- | src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | 29 | ||||
| -rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | 39 | ||||
| -rw-r--r-- | src/shader_recompiler/profile.h | 4 |
3 files changed, 72 insertions, 0 deletions
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index f335c8af0..418505475 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -143,6 +143,21 @@ IR::Inst* PrepareSparse(IR::Inst& inst) { | |||
| 143 | } | 143 | } |
| 144 | return sparse_inst; | 144 | return sparse_inst; |
| 145 | } | 145 | } |
| 146 | |||
| 147 | std::string ImageGatherSubpixelOffset(const IR::TextureInstInfo& info, std::string_view texture, | ||
| 148 | std::string_view coords) { | ||
| 149 | switch (info.type) { | ||
| 150 | case TextureType::Color2D: | ||
| 151 | case TextureType::Color2DRect: | ||
| 152 | return fmt::format("{}+vec2(0.001953125)/vec2(textureSize({}, 0))", coords, texture); | ||
| 153 | case TextureType::ColorArray2D: | ||
| 154 | case TextureType::ColorCube: | ||
| 155 | return fmt::format("vec3({0}.xy+vec2(0.001953125)/vec2(textureSize({1}, 0)),{0}.z)", coords, | ||
| 156 | texture); | ||
| 157 | default: | ||
| 158 | return std::string{coords}; | ||
| 159 | } | ||
| 160 | } | ||
| 146 | } // Anonymous namespace | 161 | } // Anonymous namespace |
| 147 | 162 | ||
| 148 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | 163 | void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, |
| @@ -340,6 +355,13 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 340 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | 355 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); |
| 341 | ctx.AddU1("{}=true;", *sparse_inst); | 356 | ctx.AddU1("{}=true;", *sparse_inst); |
| 342 | } | 357 | } |
| 358 | std::string coords_with_subpixel_offset; | ||
| 359 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 360 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 361 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 362 | coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); | ||
| 363 | coords = coords_with_subpixel_offset; | ||
| 364 | } | ||
| 343 | if (!sparse_inst || !supports_sparse) { | 365 | if (!sparse_inst || !supports_sparse) { |
| 344 | if (offset.IsEmpty()) { | 366 | if (offset.IsEmpty()) { |
| 345 | ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, | 367 | ctx.Add("{}=textureGather({},{},int({}));", texel, texture, coords, |
| @@ -387,6 +409,13 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde | |||
| 387 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); | 409 | LOG_WARNING(Shader_GLSL, "Device does not support sparse texture queries. STUBBING"); |
| 388 | ctx.AddU1("{}=true;", *sparse_inst); | 410 | ctx.AddU1("{}=true;", *sparse_inst); |
| 389 | } | 411 | } |
| 412 | std::string coords_with_subpixel_offset; | ||
| 413 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 414 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 415 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 416 | coords_with_subpixel_offset = ImageGatherSubpixelOffset(info, texture, coords); | ||
| 417 | coords = coords_with_subpixel_offset; | ||
| 418 | } | ||
| 390 | if (!sparse_inst || !supports_sparse) { | 419 | if (!sparse_inst || !supports_sparse) { |
| 391 | if (offset.IsEmpty()) { | 420 | if (offset.IsEmpty()) { |
| 392 | ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); | 421 | ctx.Add("{}=textureGather({},{},{});", texel, texture, coords, dref); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 02073c420..968901d42 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -261,6 +261,39 @@ Id BitTest(EmitContext& ctx, Id mask, Id bit) { | |||
| 261 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; | 261 | const Id bit_value{ctx.OpBitwiseAnd(ctx.U32[1], shifted, ctx.Const(1u))}; |
| 262 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); | 262 | return ctx.OpINotEqual(ctx.U1, bit_value, ctx.u32_zero_value); |
| 263 | } | 263 | } |
| 264 | |||
| 265 | Id ImageGatherSubpixelOffset(EmitContext& ctx, const IR::TextureInstInfo& info, Id texture, | ||
| 266 | Id coords) { | ||
| 267 | // Apply a subpixel offset of 1/512 the texel size of the texture to ensure same rounding on | ||
| 268 | // AMD hardware as on Maxwell or other Nvidia architectures. | ||
| 269 | const auto calculate_offset{[&](size_t dim) -> std::array<Id, 2> { | ||
| 270 | const Id nudge{ctx.Const(0x1p-9f)}; | ||
| 271 | const Id image_size{ctx.OpImageQuerySizeLod(ctx.U32[dim], texture, ctx.u32_zero_value)}; | ||
| 272 | const Id offset_x{ctx.OpFDiv( | ||
| 273 | ctx.F32[1], nudge, | ||
| 274 | ctx.OpConvertUToF(ctx.F32[1], ctx.OpCompositeExtract(ctx.U32[1], image_size, 0)))}; | ||
| 275 | const Id offset_y{ctx.OpFDiv( | ||
| 276 | ctx.F32[1], nudge, | ||
| 277 | ctx.OpConvertUToF(ctx.F32[1], ctx.OpCompositeExtract(ctx.U32[1], image_size, 1)))}; | ||
| 278 | return {ctx.OpFAdd(ctx.F32[1], ctx.OpCompositeExtract(ctx.F32[1], coords, 0), offset_x), | ||
| 279 | ctx.OpFAdd(ctx.F32[1], ctx.OpCompositeExtract(ctx.F32[1], coords, 1), offset_y)}; | ||
| 280 | }}; | ||
| 281 | switch (info.type) { | ||
| 282 | case TextureType::Color2D: | ||
| 283 | case TextureType::Color2DRect: { | ||
| 284 | const auto offset{calculate_offset(2)}; | ||
| 285 | return ctx.OpCompositeConstruct(ctx.F32[2], offset[0], offset[1]); | ||
| 286 | } | ||
| 287 | case TextureType::ColorArray2D: | ||
| 288 | case TextureType::ColorCube: { | ||
| 289 | const auto offset{calculate_offset(3)}; | ||
| 290 | return ctx.OpCompositeConstruct(ctx.F32[3], offset[0], offset[1], | ||
| 291 | ctx.OpCompositeExtract(ctx.F32[1], coords, 2)); | ||
| 292 | } | ||
| 293 | default: | ||
| 294 | return coords; | ||
| 295 | } | ||
| 296 | } | ||
| 264 | } // Anonymous namespace | 297 | } // Anonymous namespace |
| 265 | 298 | ||
| 266 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { | 299 | Id EmitBindlessImageSampleImplicitLod(EmitContext&) { |
| @@ -423,6 +456,9 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id | |||
| 423 | const IR::Value& offset, const IR::Value& offset2) { | 456 | const IR::Value& offset, const IR::Value& offset2) { |
| 424 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 457 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 425 | const ImageOperands operands(ctx, offset, offset2); | 458 | const ImageOperands operands(ctx, offset, offset2); |
| 459 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 460 | coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); | ||
| 461 | } | ||
| 426 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | 462 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, |
| 427 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), | 463 | ctx.F32[4], Texture(ctx, info, index), coords, ctx.Const(info.gather_component), |
| 428 | operands.MaskOptional(), operands.Span()); | 464 | operands.MaskOptional(), operands.Span()); |
| @@ -432,6 +468,9 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, | |||
| 432 | const IR::Value& offset, const IR::Value& offset2, Id dref) { | 468 | const IR::Value& offset, const IR::Value& offset2, Id dref) { |
| 433 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 469 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 434 | const ImageOperands operands(ctx, offset, offset2); | 470 | const ImageOperands operands(ctx, offset, offset2); |
| 471 | if (ctx.profile.need_gather_subpixel_offset) { | ||
| 472 | coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords); | ||
| 473 | } | ||
| 435 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | 474 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, |
| 436 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), | 475 | ctx.F32[4], Texture(ctx, info, index), coords, dref, operands.MaskOptional(), |
| 437 | operands.Span()); | 476 | operands.Span()); |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 253e0d0bd..31390e869 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -52,6 +52,10 @@ struct Profile { | |||
| 52 | bool need_declared_frag_colors{}; | 52 | bool need_declared_frag_colors{}; |
| 53 | /// Prevents fast math optimizations that may cause inaccuracies | 53 | /// Prevents fast math optimizations that may cause inaccuracies |
| 54 | bool need_fastmath_off{}; | 54 | bool need_fastmath_off{}; |
| 55 | /// Some GPU vendors use a lower fixed point format of 16.8 when calculating pixel coordinates | ||
| 56 | /// in the ImageGather instruction than the Maxwell architecture does. Applying an offset does | ||
| 57 | /// fix this mismatching rounding behaviour. | ||
| 58 | bool need_gather_subpixel_offset{}; | ||
| 55 | 59 | ||
| 56 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead | 60 | /// OpFClamp is broken and OpFMax + OpFMin should be used instead |
| 57 | bool has_broken_spirv_clamp{}; | 61 | bool has_broken_spirv_clamp{}; |