diff options
| author | 2021-10-29 17:02:57 +0200 | |
|---|---|---|
| committer | 2021-11-16 22:11:33 +0100 | |
| commit | de1c8c5c2c3131bb122351e676014cdc7c442e78 (patch) | |
| tree | ef09b2327f240c21c368a644e6450d78400d4a5d /src | |
| parent | texture_cache: Refactor Render Target scaling function (diff) | |
| download | yuzu-de1c8c5c2c3131bb122351e676014cdc7c442e78.tar.gz yuzu-de1c8c5c2c3131bb122351e676014cdc7c442e78.tar.xz yuzu-de1c8c5c2c3131bb122351e676014cdc7c442e78.zip | |
Texture Cahe/Shader decompiler: Resize PointSize on rescaling, refactor and make reaper more agressive on 4Gb GPUs.
Diffstat (limited to 'src')
| -rw-r--r-- | src/shader_recompiler/ir_opt/rescaling_pass.cpp | 21 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_base.h | 1 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_info.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 18 |
5 files changed, 29 insertions, 22 deletions
diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index a5fa4ee83..81098c038 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp | |||
| @@ -75,6 +75,14 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) { | |||
| 75 | inst.ReplaceUsesWith(downscaled_frag_coord); | 75 | inst.ReplaceUsesWith(downscaled_frag_coord); |
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | void PatchPointSize(IR::Block& block, IR::Inst& inst) { | ||
| 79 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 80 | const IR::F32 point_value{inst.Arg(1)}; | ||
| 81 | const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())}; | ||
| 82 | const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)}; | ||
| 83 | inst.SetArg(1, upscaled_point_value); | ||
| 84 | } | ||
| 85 | |||
| 78 | [[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { | 86 | [[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) { |
| 79 | IR::U32 scaled_value{value}; | 87 | IR::U32 scaled_value{value}; |
| 80 | if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { | 88 | if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) { |
| @@ -253,6 +261,19 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) { | |||
| 253 | } | 261 | } |
| 254 | break; | 262 | break; |
| 255 | } | 263 | } |
| 264 | case IR::Opcode::SetAttribute: { | ||
| 265 | const IR::Attribute attr{inst.Arg(0).Attribute()}; | ||
| 266 | switch (attr) { | ||
| 267 | case IR::Attribute::PointSize: | ||
| 268 | if (inst.Flags<u32>() != 0xDEADBEEF) { | ||
| 269 | PatchPointSize(block, inst); | ||
| 270 | } | ||
| 271 | break; | ||
| 272 | default: | ||
| 273 | break; | ||
| 274 | } | ||
| 275 | break; | ||
| 276 | } | ||
| 256 | case IR::Opcode::ImageQueryDimensions: | 277 | case IR::Opcode::ImageQueryDimensions: |
| 257 | PatchImageQueryDimensions(block, inst); | 278 | PatchImageQueryDimensions(block, inst); |
| 258 | break; | 279 | break; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d8ac46d2a..9b516c64f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -976,8 +976,9 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 976 | 976 | ||
| 977 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); | 977 | oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); |
| 978 | oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); | 978 | oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable); |
| 979 | 979 | const bool is_rescaling{texture_cache.IsRescaling()}; | |
| 980 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); | 980 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; |
| 981 | glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale)); | ||
| 981 | } | 982 | } |
| 982 | 983 | ||
| 983 | void RasterizerOpenGL::SyncLineState() { | 984 | void RasterizerOpenGL::SyncLineState() { |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 02c669766..89c111c00 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -38,7 +38,6 @@ enum class ImageFlagBits : u32 { | |||
| 38 | Rescaled = 1 << 12, | 38 | Rescaled = 1 << 12, |
| 39 | CheckingRescalable = 1 << 13, | 39 | CheckingRescalable = 1 << 13, |
| 40 | IsRescalable = 1 << 14, | 40 | IsRescalable = 1 << 14, |
| 41 | Blacklisted = 1 << 15, | ||
| 42 | }; | 41 | }; |
| 43 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 42 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 44 | 43 | ||
diff --git a/src/video_core/texture_cache/image_info.cpp b/src/video_core/texture_cache/image_info.cpp index d8e414247..015a2d33d 100644 --- a/src/video_core/texture_cache/image_info.cpp +++ b/src/video_core/texture_cache/image_info.cpp | |||
| @@ -135,7 +135,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) | |||
| 135 | type = ImageType::e3D; | 135 | type = ImageType::e3D; |
| 136 | size.depth = rt.depth; | 136 | size.depth = rt.depth; |
| 137 | } else { | 137 | } else { |
| 138 | rescaleable = block.depth == 0 && size.height > 256; | 138 | rescaleable = block.depth == 0; |
| 139 | downscaleable = size.height > 512; | 139 | downscaleable = size.height > 512; |
| 140 | type = ImageType::e2D; | 140 | type = ImageType::e2D; |
| 141 | resources.layers = rt.depth; | 141 | resources.layers = rt.depth; |
| @@ -165,7 +165,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept { | |||
| 165 | type = ImageType::e3D; | 165 | type = ImageType::e3D; |
| 166 | size.depth = regs.zeta_depth; | 166 | size.depth = regs.zeta_depth; |
| 167 | } else { | 167 | } else { |
| 168 | rescaleable = block.depth == 0 && size.height > 256; | 168 | rescaleable = block.depth == 0; |
| 169 | downscaleable = size.height > 512; | 169 | downscaleable = size.height > 512; |
| 170 | type = ImageType::e2D; | 170 | type = ImageType::e2D; |
| 171 | resources.layers = regs.zeta_depth; | 171 | resources.layers = regs.zeta_depth; |
| @@ -199,7 +199,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | |||
| 199 | .height = config.height, | 199 | .height = config.height, |
| 200 | .depth = 1, | 200 | .depth = 1, |
| 201 | }; | 201 | }; |
| 202 | rescaleable = block.depth == 0 && size.height > 256; | 202 | rescaleable = block.depth == 0; |
| 203 | downscaleable = size.height > 512; | 203 | downscaleable = size.height > 512; |
| 204 | } | 204 | } |
| 205 | } | 205 | } |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8031b695..aec130a32 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -53,8 +53,8 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& | |||
| 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); | 53 | const auto device_memory = runtime.GetDeviceLocalMemory(); |
| 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; | 54 | const u64 possible_expected_memory = (device_memory * 4) / 10; |
| 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; | 55 | const u64 possible_critical_memory = (device_memory * 7) / 10; |
| 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | 56 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB); |
| 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | 57 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB); |
| 58 | minimum_memory = 0; | 58 | minimum_memory = 0; |
| 59 | } else { | 59 | } else { |
| 60 | // On OpenGL we can be more conservatives as the driver takes care. | 60 | // On OpenGL we can be more conservatives as the driver takes care. |
| @@ -355,7 +355,6 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, | |||
| 355 | if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { | 355 | if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) { |
| 356 | const ImageViewBase& image_view{slot_image_views[view.id]}; | 356 | const ImageViewBase& image_view{slot_image_views[view.id]}; |
| 357 | auto& image = slot_images[image_view.image_id]; | 357 | auto& image = slot_images[image_view.image_id]; |
| 358 | image.flags |= ImageFlagBits::Blacklisted; | ||
| 359 | has_blacklisted |= ScaleDown(image); | 358 | has_blacklisted |= ScaleDown(image); |
| 360 | image.scale_rating = 0; | 359 | image.scale_rating = 0; |
| 361 | } | 360 | } |
| @@ -985,7 +984,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 985 | 984 | ||
| 986 | bool can_rescale = info.rescaleable; | 985 | bool can_rescale = info.rescaleable; |
| 987 | bool any_rescaled = false; | 986 | bool any_rescaled = false; |
| 988 | bool any_blacklisted = false; | ||
| 989 | for (const ImageId sibling_id : all_siblings) { | 987 | for (const ImageId sibling_id : all_siblings) { |
| 990 | if (!can_rescale) { | 988 | if (!can_rescale) { |
| 991 | break; | 989 | break; |
| @@ -993,7 +991,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 993 | Image& sibling = slot_images[sibling_id]; | 991 | Image& sibling = slot_images[sibling_id]; |
| 994 | can_rescale &= ImageCanRescale(sibling); | 992 | can_rescale &= ImageCanRescale(sibling); |
| 995 | any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); | 993 | any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled); |
| 996 | any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted); | ||
| 997 | } | 994 | } |
| 998 | 995 | ||
| 999 | can_rescale &= any_rescaled; | 996 | can_rescale &= any_rescaled; |
| @@ -1007,9 +1004,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1007 | for (const ImageId sibling_id : all_siblings) { | 1004 | for (const ImageId sibling_id : all_siblings) { |
| 1008 | Image& sibling = slot_images[sibling_id]; | 1005 | Image& sibling = slot_images[sibling_id]; |
| 1009 | ScaleDown(sibling); | 1006 | ScaleDown(sibling); |
| 1010 | if (any_blacklisted) { | ||
| 1011 | sibling.flags |= ImageFlagBits::Blacklisted; | ||
| 1012 | } | ||
| 1013 | } | 1007 | } |
| 1014 | } | 1008 | } |
| 1015 | 1009 | ||
| @@ -1644,7 +1638,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1644 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | 1638 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; |
| 1645 | Image& image = slot_images[image_id]; | 1639 | Image& image = slot_images[image_id]; |
| 1646 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); | 1640 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); |
| 1647 | bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted); | ||
| 1648 | u64 most_recent_tick = image.modification_tick; | 1641 | u64 most_recent_tick = image.modification_tick; |
| 1649 | for (const AliasedImage& aliased : image.aliased_images) { | 1642 | for (const AliasedImage& aliased : image.aliased_images) { |
| 1650 | ImageBase& aliased_image = slot_images[aliased.id]; | 1643 | ImageBase& aliased_image = slot_images[aliased.id]; |
| @@ -1652,7 +1645,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1652 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | 1645 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); |
| 1653 | aliased_images.push_back(&aliased); | 1646 | aliased_images.push_back(&aliased); |
| 1654 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); | 1647 | any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); |
| 1655 | any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted); | ||
| 1656 | } | 1648 | } |
| 1657 | } | 1649 | } |
| 1658 | if (aliased_images.empty()) { | 1650 | if (aliased_images.empty()) { |
| @@ -1664,9 +1656,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1664 | ScaleUp(image); | 1656 | ScaleUp(image); |
| 1665 | } else { | 1657 | } else { |
| 1666 | ScaleDown(image); | 1658 | ScaleDown(image); |
| 1667 | if (any_blacklisted) { | ||
| 1668 | image.flags |= ImageFlagBits::Blacklisted; | ||
| 1669 | } | ||
| 1670 | } | 1659 | } |
| 1671 | } | 1660 | } |
| 1672 | image.modification_tick = most_recent_tick; | 1661 | image.modification_tick = most_recent_tick; |
| @@ -1684,9 +1673,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | |||
| 1684 | Image& aliased_image = slot_images[aliased->id]; | 1673 | Image& aliased_image = slot_images[aliased->id]; |
| 1685 | if (!can_rescale) { | 1674 | if (!can_rescale) { |
| 1686 | ScaleDown(aliased_image); | 1675 | ScaleDown(aliased_image); |
| 1687 | if (any_blacklisted) { | ||
| 1688 | aliased_image.flags |= ImageFlagBits::Blacklisted; | ||
| 1689 | } | ||
| 1690 | CopyImage(image_id, aliased->id, aliased->copies); | 1676 | CopyImage(image_id, aliased->id, aliased->copies); |
| 1691 | continue; | 1677 | continue; |
| 1692 | } | 1678 | } |