diff options
Diffstat (limited to 'src')
62 files changed, 4195 insertions, 3269 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 198b3fe07..2554add28 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -44,6 +44,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 44 | "${VIDEO_CORE}/shader/decode/half_set.cpp" | 44 | "${VIDEO_CORE}/shader/decode/half_set.cpp" |
| 45 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" | 45 | "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp" |
| 46 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" | 46 | "${VIDEO_CORE}/shader/decode/hfma2.cpp" |
| 47 | "${VIDEO_CORE}/shader/decode/image.cpp" | ||
| 47 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | 48 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" |
| 48 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | 49 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |
| 49 | "${VIDEO_CORE}/shader/decode/memory.cpp" | 50 | "${VIDEO_CORE}/shader/decode/memory.cpp" |
| @@ -74,6 +75,7 @@ add_library(common STATIC | |||
| 74 | assert.h | 75 | assert.h |
| 75 | detached_tasks.cpp | 76 | detached_tasks.cpp |
| 76 | detached_tasks.h | 77 | detached_tasks.h |
| 78 | binary_find.h | ||
| 77 | bit_field.h | 79 | bit_field.h |
| 78 | bit_util.h | 80 | bit_util.h |
| 79 | cityhash.cpp | 81 | cityhash.cpp |
diff --git a/src/common/alignment.h b/src/common/alignment.h index d94a2291f..617b14d9b 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h | |||
| @@ -20,6 +20,12 @@ constexpr T AlignDown(T value, std::size_t size) { | |||
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | template <typename T> | 22 | template <typename T> |
| 23 | constexpr T AlignBits(T value, std::size_t align) { | ||
| 24 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | ||
| 25 | return static_cast<T>((value + ((1ULL << align) - 1)) >> align << align); | ||
| 26 | } | ||
| 27 | |||
| 28 | template <typename T> | ||
| 23 | constexpr bool Is4KBAligned(T value) { | 29 | constexpr bool Is4KBAligned(T value) { |
| 24 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); | 30 | static_assert(std::is_unsigned_v<T>, "T must be an unsigned value."); |
| 25 | return (value & 0xFFF) == 0; | 31 | return (value & 0xFFF) == 0; |
diff --git a/src/common/binary_find.h b/src/common/binary_find.h new file mode 100644 index 000000000..5cc523bf9 --- /dev/null +++ b/src/common/binary_find.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | template <class ForwardIt, class T, class Compare = std::less<>> | ||
| 12 | ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { | ||
| 13 | // Note: BOTH type T and the type after ForwardIt is dereferenced | ||
| 14 | // must be implicitly convertible to BOTH Type1 and Type2, used in Compare. | ||
| 15 | // This is stricter than lower_bound requirement (see above) | ||
| 16 | |||
| 17 | first = std::lower_bound(first, last, value, comp); | ||
| 18 | return first != last && !comp(value, *first) ? first : last; | ||
| 19 | } | ||
| 20 | |||
| 21 | } // namespace Common | ||
diff --git a/src/common/bit_util.h b/src/common/bit_util.h index d032df413..6f7d5a947 100644 --- a/src/common/bit_util.h +++ b/src/common/bit_util.h | |||
| @@ -97,4 +97,48 @@ inline u32 CountTrailingZeroes64(u64 value) { | |||
| 97 | } | 97 | } |
| 98 | #endif | 98 | #endif |
| 99 | 99 | ||
| 100 | #ifdef _MSC_VER | ||
| 101 | |||
| 102 | inline u32 MostSignificantBit32(const u32 value) { | ||
| 103 | unsigned long result; | ||
| 104 | _BitScanReverse(&result, value); | ||
| 105 | return static_cast<u32>(result); | ||
| 106 | } | ||
| 107 | |||
| 108 | inline u32 MostSignificantBit64(const u64 value) { | ||
| 109 | unsigned long result; | ||
| 110 | _BitScanReverse64(&result, value); | ||
| 111 | return static_cast<u32>(result); | ||
| 112 | } | ||
| 113 | |||
| 114 | #else | ||
| 115 | |||
| 116 | inline u32 MostSignificantBit32(const u32 value) { | ||
| 117 | return 31U - static_cast<u32>(__builtin_clz(value)); | ||
| 118 | } | ||
| 119 | |||
| 120 | inline u32 MostSignificantBit64(const u64 value) { | ||
| 121 | return 63U - static_cast<u32>(__builtin_clzll(value)); | ||
| 122 | } | ||
| 123 | |||
| 124 | #endif | ||
| 125 | |||
| 126 | inline u32 Log2Floor32(const u32 value) { | ||
| 127 | return MostSignificantBit32(value); | ||
| 128 | } | ||
| 129 | |||
| 130 | inline u32 Log2Ceil32(const u32 value) { | ||
| 131 | const u32 log2_f = Log2Floor32(value); | ||
| 132 | return log2_f + ((value ^ (1U << log2_f)) != 0U); | ||
| 133 | } | ||
| 134 | |||
| 135 | inline u32 Log2Floor64(const u64 value) { | ||
| 136 | return MostSignificantBit64(value); | ||
| 137 | } | ||
| 138 | |||
| 139 | inline u32 Log2Ceil64(const u64 value) { | ||
| 140 | const u64 log2_f = static_cast<u64>(Log2Floor64(value)); | ||
| 141 | return static_cast<u32>(log2_f + ((value ^ (1ULL << log2_f)) != 0ULL)); | ||
| 142 | } | ||
| 143 | |||
| 100 | } // namespace Common | 144 | } // namespace Common |
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 8b0d34da6..04ecac959 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | ||
| 7 | #include <string> | 8 | #include <string> |
| 8 | 9 | ||
| 9 | #if !defined(ARCHITECTURE_x86_64) | 10 | #if !defined(ARCHITECTURE_x86_64) |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f8b67cbe1..6839abe71 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -41,12 +41,12 @@ add_library(video_core STATIC | |||
| 41 | renderer_opengl/gl_buffer_cache.h | 41 | renderer_opengl/gl_buffer_cache.h |
| 42 | renderer_opengl/gl_device.cpp | 42 | renderer_opengl/gl_device.cpp |
| 43 | renderer_opengl/gl_device.h | 43 | renderer_opengl/gl_device.h |
| 44 | renderer_opengl/gl_framebuffer_cache.cpp | ||
| 45 | renderer_opengl/gl_framebuffer_cache.h | ||
| 44 | renderer_opengl/gl_global_cache.cpp | 46 | renderer_opengl/gl_global_cache.cpp |
| 45 | renderer_opengl/gl_global_cache.h | 47 | renderer_opengl/gl_global_cache.h |
| 46 | renderer_opengl/gl_rasterizer.cpp | 48 | renderer_opengl/gl_rasterizer.cpp |
| 47 | renderer_opengl/gl_rasterizer.h | 49 | renderer_opengl/gl_rasterizer.h |
| 48 | renderer_opengl/gl_rasterizer_cache.cpp | ||
| 49 | renderer_opengl/gl_rasterizer_cache.h | ||
| 50 | renderer_opengl/gl_resource_manager.cpp | 50 | renderer_opengl/gl_resource_manager.cpp |
| 51 | renderer_opengl/gl_resource_manager.h | 51 | renderer_opengl/gl_resource_manager.h |
| 52 | renderer_opengl/gl_sampler_cache.cpp | 52 | renderer_opengl/gl_sampler_cache.cpp |
| @@ -67,6 +67,8 @@ add_library(video_core STATIC | |||
| 67 | renderer_opengl/gl_state.h | 67 | renderer_opengl/gl_state.h |
| 68 | renderer_opengl/gl_stream_buffer.cpp | 68 | renderer_opengl/gl_stream_buffer.cpp |
| 69 | renderer_opengl/gl_stream_buffer.h | 69 | renderer_opengl/gl_stream_buffer.h |
| 70 | renderer_opengl/gl_texture_cache.cpp | ||
| 71 | renderer_opengl/gl_texture_cache.h | ||
| 70 | renderer_opengl/maxwell_to_gl.h | 72 | renderer_opengl/maxwell_to_gl.h |
| 71 | renderer_opengl/renderer_opengl.cpp | 73 | renderer_opengl/renderer_opengl.cpp |
| 72 | renderer_opengl/renderer_opengl.h | 74 | renderer_opengl/renderer_opengl.h |
| @@ -88,6 +90,7 @@ add_library(video_core STATIC | |||
| 88 | shader/decode/conversion.cpp | 90 | shader/decode/conversion.cpp |
| 89 | shader/decode/memory.cpp | 91 | shader/decode/memory.cpp |
| 90 | shader/decode/texture.cpp | 92 | shader/decode/texture.cpp |
| 93 | shader/decode/image.cpp | ||
| 91 | shader/decode/float_set_predicate.cpp | 94 | shader/decode/float_set_predicate.cpp |
| 92 | shader/decode/integer_set_predicate.cpp | 95 | shader/decode/integer_set_predicate.cpp |
| 93 | shader/decode/half_set_predicate.cpp | 96 | shader/decode/half_set_predicate.cpp |
| @@ -109,6 +112,13 @@ add_library(video_core STATIC | |||
| 109 | shader/track.cpp | 112 | shader/track.cpp |
| 110 | surface.cpp | 113 | surface.cpp |
| 111 | surface.h | 114 | surface.h |
| 115 | texture_cache/surface_base.cpp | ||
| 116 | texture_cache/surface_base.h | ||
| 117 | texture_cache/surface_params.cpp | ||
| 118 | texture_cache/surface_params.h | ||
| 119 | texture_cache/surface_view.cpp | ||
| 120 | texture_cache/surface_view.h | ||
| 121 | texture_cache/texture_cache.h | ||
| 112 | textures/astc.cpp | 122 | textures/astc.cpp |
| 113 | textures/astc.h | 123 | textures/astc.h |
| 114 | textures/convert.cpp | 124 | textures/convert.cpp |
| @@ -116,8 +126,6 @@ add_library(video_core STATIC | |||
| 116 | textures/decoders.cpp | 126 | textures/decoders.cpp |
| 117 | textures/decoders.h | 127 | textures/decoders.h |
| 118 | textures/texture.h | 128 | textures/texture.h |
| 119 | texture_cache.cpp | ||
| 120 | texture_cache.h | ||
| 121 | video_core.cpp | 129 | video_core.cpp |
| 122 | video_core.h | 130 | video_core.h |
| 123 | ) | 131 | ) |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 082a40cd9..d44ad0cd8 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -36,10 +36,10 @@ void State::ProcessData(const u32 data, const bool is_last_call) { | |||
| 36 | } else { | 36 | } else { |
| 37 | UNIMPLEMENTED_IF(regs.dest.z != 0); | 37 | UNIMPLEMENTED_IF(regs.dest.z != 0); |
| 38 | UNIMPLEMENTED_IF(regs.dest.depth != 1); | 38 | UNIMPLEMENTED_IF(regs.dest.depth != 1); |
| 39 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | 39 | UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 0); |
| 40 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | 40 | UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 0); |
| 41 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 41 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 42 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | 42 | true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 0); |
| 43 | tmp_buffer.resize(dst_size); | 43 | tmp_buffer.resize(dst_size); |
| 44 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 44 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); |
| 45 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, | 45 | Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, regs.dest.y, |
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index ef4f5839a..462da419e 100644 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h | |||
| @@ -39,15 +39,15 @@ struct Registers { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | u32 BlockWidth() const { | 41 | u32 BlockWidth() const { |
| 42 | return 1U << block_width.Value(); | 42 | return block_width.Value(); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | u32 BlockHeight() const { | 45 | u32 BlockHeight() const { |
| 46 | return 1U << block_height.Value(); | 46 | return block_height.Value(); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | u32 BlockDepth() const { | 49 | u32 BlockDepth() const { |
| 50 | return 1U << block_depth.Value(); | 50 | return block_depth.Value(); |
| 51 | } | 51 | } |
| 52 | } dest; | 52 | } dest; |
| 53 | }; | 53 | }; |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 55966eef1..0ee228e28 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -4,7 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/math_util.h" | ||
| 8 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 9 | #include "video_core/memory_manager.h" | 8 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| @@ -35,21 +34,31 @@ void Fermi2D::HandleSurfaceCopy() { | |||
| 35 | static_cast<u32>(regs.operation)); | 34 | static_cast<u32>(regs.operation)); |
| 36 | 35 | ||
| 37 | // TODO(Subv): Only raw copies are implemented. | 36 | // TODO(Subv): Only raw copies are implemented. |
| 38 | ASSERT(regs.operation == Regs::Operation::SrcCopy); | 37 | ASSERT(regs.operation == Operation::SrcCopy); |
| 39 | 38 | ||
| 40 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; | 39 | const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)}; |
| 41 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; | 40 | const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)}; |
| 42 | const u32 src_blit_x2{ | 41 | u32 src_blit_x2, src_blit_y2; |
| 43 | static_cast<u32>((regs.blit_src_x + (regs.blit_dst_width * regs.blit_du_dx)) >> 32)}; | 42 | if (regs.blit_control.origin == Origin::Corner) { |
| 44 | const u32 src_blit_y2{ | 43 | src_blit_x2 = |
| 45 | static_cast<u32>((regs.blit_src_y + (regs.blit_dst_height * regs.blit_dv_dy)) >> 32)}; | 44 | static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32); |
| 46 | 45 | src_blit_y2 = | |
| 46 | static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32); | ||
| 47 | } else { | ||
| 48 | src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width); | ||
| 49 | src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height); | ||
| 50 | } | ||
| 47 | const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; | 51 | const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; |
| 48 | const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, | 52 | const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, |
| 49 | regs.blit_dst_x + regs.blit_dst_width, | 53 | regs.blit_dst_x + regs.blit_dst_width, |
| 50 | regs.blit_dst_y + regs.blit_dst_height}; | 54 | regs.blit_dst_y + regs.blit_dst_height}; |
| 55 | Config copy_config; | ||
| 56 | copy_config.operation = regs.operation; | ||
| 57 | copy_config.filter = regs.blit_control.filter; | ||
| 58 | copy_config.src_rect = src_rect; | ||
| 59 | copy_config.dst_rect = dst_rect; | ||
| 51 | 60 | ||
| 52 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, src_rect, dst_rect)) { | 61 | if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { |
| 53 | UNIMPLEMENTED(); | 62 | UNIMPLEMENTED(); |
| 54 | } | 63 | } |
| 55 | } | 64 | } |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 45f59a4d9..05421d185 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/math_util.h" | ||
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 13 | 14 | ||
| 14 | namespace Tegra { | 15 | namespace Tegra { |
| @@ -38,6 +39,26 @@ public: | |||
| 38 | /// Write the value to the register identified by method. | 39 | /// Write the value to the register identified by method. |
| 39 | void CallMethod(const GPU::MethodCall& method_call); | 40 | void CallMethod(const GPU::MethodCall& method_call); |
| 40 | 41 | ||
| 42 | enum class Origin : u32 { | ||
| 43 | Center = 0, | ||
| 44 | Corner = 1, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class Filter : u32 { | ||
| 48 | PointSample = 0, // Nearest | ||
| 49 | Linear = 1, | ||
| 50 | }; | ||
| 51 | |||
| 52 | enum class Operation : u32 { | ||
| 53 | SrcCopyAnd = 0, | ||
| 54 | ROPAnd = 1, | ||
| 55 | Blend = 2, | ||
| 56 | SrcCopy = 3, | ||
| 57 | ROP = 4, | ||
| 58 | SrcCopyPremult = 5, | ||
| 59 | BlendPremult = 6, | ||
| 60 | }; | ||
| 61 | |||
| 41 | struct Regs { | 62 | struct Regs { |
| 42 | static constexpr std::size_t NUM_REGS = 0x258; | 63 | static constexpr std::size_t NUM_REGS = 0x258; |
| 43 | 64 | ||
| @@ -63,32 +84,19 @@ public: | |||
| 63 | } | 84 | } |
| 64 | 85 | ||
| 65 | u32 BlockWidth() const { | 86 | u32 BlockWidth() const { |
| 66 | // The block width is stored in log2 format. | 87 | return block_width.Value(); |
| 67 | return 1 << block_width; | ||
| 68 | } | 88 | } |
| 69 | 89 | ||
| 70 | u32 BlockHeight() const { | 90 | u32 BlockHeight() const { |
| 71 | // The block height is stored in log2 format. | 91 | return block_height.Value(); |
| 72 | return 1 << block_height; | ||
| 73 | } | 92 | } |
| 74 | 93 | ||
| 75 | u32 BlockDepth() const { | 94 | u32 BlockDepth() const { |
| 76 | // The block depth is stored in log2 format. | 95 | return block_depth.Value(); |
| 77 | return 1 << block_depth; | ||
| 78 | } | 96 | } |
| 79 | }; | 97 | }; |
| 80 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); | 98 | static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); |
| 81 | 99 | ||
| 82 | enum class Operation : u32 { | ||
| 83 | SrcCopyAnd = 0, | ||
| 84 | ROPAnd = 1, | ||
| 85 | Blend = 2, | ||
| 86 | SrcCopy = 3, | ||
| 87 | ROP = 4, | ||
| 88 | SrcCopyPremult = 5, | ||
| 89 | BlendPremult = 6, | ||
| 90 | }; | ||
| 91 | |||
| 92 | union { | 100 | union { |
| 93 | struct { | 101 | struct { |
| 94 | INSERT_PADDING_WORDS(0x80); | 102 | INSERT_PADDING_WORDS(0x80); |
| @@ -105,7 +113,11 @@ public: | |||
| 105 | 113 | ||
| 106 | INSERT_PADDING_WORDS(0x177); | 114 | INSERT_PADDING_WORDS(0x177); |
| 107 | 115 | ||
| 108 | u32 blit_control; | 116 | union { |
| 117 | u32 raw; | ||
| 118 | BitField<0, 1, Origin> origin; | ||
| 119 | BitField<4, 1, Filter> filter; | ||
| 120 | } blit_control; | ||
| 109 | 121 | ||
| 110 | INSERT_PADDING_WORDS(0x8); | 122 | INSERT_PADDING_WORDS(0x8); |
| 111 | 123 | ||
| @@ -124,6 +136,13 @@ public: | |||
| 124 | }; | 136 | }; |
| 125 | } regs{}; | 137 | } regs{}; |
| 126 | 138 | ||
| 139 | struct Config { | ||
| 140 | Operation operation; | ||
| 141 | Filter filter; | ||
| 142 | Common::Rectangle<u32> src_rect; | ||
| 143 | Common::Rectangle<u32> dst_rect; | ||
| 144 | }; | ||
| 145 | |||
| 127 | private: | 146 | private: |
| 128 | VideoCore::RasterizerInterface& rasterizer; | 147 | VideoCore::RasterizerInterface& rasterizer; |
| 129 | MemoryManager& memory_manager; | 148 | MemoryManager& memory_manager; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 08d553696..8755b8af4 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -430,14 +430,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | |||
| 430 | Texture::TICEntry tic_entry; | 430 | Texture::TICEntry tic_entry; |
| 431 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 431 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 432 | 432 | ||
| 433 | ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || | 433 | const auto r_type{tic_entry.r_type.Value()}; |
| 434 | tic_entry.header_version == Texture::TICHeaderVersion::Pitch, | 434 | const auto g_type{tic_entry.g_type.Value()}; |
| 435 | "TIC versions other than BlockLinear or Pitch are unimplemented"); | 435 | const auto b_type{tic_entry.b_type.Value()}; |
| 436 | 436 | const auto a_type{tic_entry.a_type.Value()}; | |
| 437 | const auto r_type = tic_entry.r_type.Value(); | ||
| 438 | const auto g_type = tic_entry.g_type.Value(); | ||
| 439 | const auto b_type = tic_entry.b_type.Value(); | ||
| 440 | const auto a_type = tic_entry.a_type.Value(); | ||
| 441 | 437 | ||
| 442 | // TODO(Subv): Different data types for separate components are not supported | 438 | // TODO(Subv): Different data types for separate components are not supported |
| 443 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); | 439 | DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 3a5dfef0c..afb9578d0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -111,7 +111,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 111 | 111 | ||
| 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | 112 | memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); |
| 113 | } else { | 113 | } else { |
| 114 | ASSERT(regs.dst_params.BlockDepth() == 1); | 114 | ASSERT(regs.dst_params.BlockDepth() == 0); |
| 115 | 115 | ||
| 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; | 116 | const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; |
| 117 | 117 | ||
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index e5942f671..17b015ca7 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -59,11 +59,11 @@ public: | |||
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | u32 BlockHeight() const { | 61 | u32 BlockHeight() const { |
| 62 | return 1 << block_height; | 62 | return block_height.Value(); |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | u32 BlockDepth() const { | 65 | u32 BlockDepth() const { |
| 66 | return 1 << block_depth; | 66 | return block_depth.Value(); |
| 67 | } | 67 | } |
| 68 | }; | 68 | }; |
| 69 | 69 | ||
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index ffb3ec3e0..404d4f5aa 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <bitset> | 8 | #include <bitset> |
| 8 | #include <optional> | 9 | #include <optional> |
| 9 | #include <tuple> | 10 | #include <tuple> |
| @@ -126,6 +127,15 @@ union Sampler { | |||
| 126 | u64 value{}; | 127 | u64 value{}; |
| 127 | }; | 128 | }; |
| 128 | 129 | ||
| 130 | union Image { | ||
| 131 | Image() = default; | ||
| 132 | |||
| 133 | constexpr explicit Image(u64 value) : value{value} {} | ||
| 134 | |||
| 135 | BitField<36, 13, u64> index; | ||
| 136 | u64 value; | ||
| 137 | }; | ||
| 138 | |||
| 129 | } // namespace Tegra::Shader | 139 | } // namespace Tegra::Shader |
| 130 | 140 | ||
| 131 | namespace std { | 141 | namespace std { |
| @@ -344,6 +354,26 @@ enum class TextureMiscMode : u64 { | |||
| 344 | PTP, | 354 | PTP, |
| 345 | }; | 355 | }; |
| 346 | 356 | ||
| 357 | enum class SurfaceDataMode : u64 { | ||
| 358 | P = 0, | ||
| 359 | D_BA = 1, | ||
| 360 | }; | ||
| 361 | |||
| 362 | enum class OutOfBoundsStore : u64 { | ||
| 363 | Ignore = 0, | ||
| 364 | Clamp = 1, | ||
| 365 | Trap = 2, | ||
| 366 | }; | ||
| 367 | |||
| 368 | enum class ImageType : u64 { | ||
| 369 | Texture1D = 0, | ||
| 370 | TextureBuffer = 1, | ||
| 371 | Texture1DArray = 2, | ||
| 372 | Texture2D = 3, | ||
| 373 | Texture2DArray = 4, | ||
| 374 | Texture3D = 5, | ||
| 375 | }; | ||
| 376 | |||
| 347 | enum class IsberdMode : u64 { | 377 | enum class IsberdMode : u64 { |
| 348 | None = 0, | 378 | None = 0, |
| 349 | Patch = 1, | 379 | Patch = 1, |
| @@ -398,7 +428,7 @@ enum class LmemLoadCacheManagement : u64 { | |||
| 398 | CV = 3, | 428 | CV = 3, |
| 399 | }; | 429 | }; |
| 400 | 430 | ||
| 401 | enum class LmemStoreCacheManagement : u64 { | 431 | enum class StoreCacheManagement : u64 { |
| 402 | Default = 0, | 432 | Default = 0, |
| 403 | CG = 1, | 433 | CG = 1, |
| 404 | CS = 2, | 434 | CS = 2, |
| @@ -811,7 +841,7 @@ union Instruction { | |||
| 811 | } ld_l; | 841 | } ld_l; |
| 812 | 842 | ||
| 813 | union { | 843 | union { |
| 814 | BitField<44, 2, LmemStoreCacheManagement> cache_management; | 844 | BitField<44, 2, StoreCacheManagement> cache_management; |
| 815 | } st_l; | 845 | } st_l; |
| 816 | 846 | ||
| 817 | union { | 847 | union { |
| @@ -1232,6 +1262,20 @@ union Instruction { | |||
| 1232 | } texs; | 1262 | } texs; |
| 1233 | 1263 | ||
| 1234 | union { | 1264 | union { |
| 1265 | BitField<28, 1, u64> is_array; | ||
| 1266 | BitField<29, 2, TextureType> texture_type; | ||
| 1267 | BitField<35, 1, u64> aoffi; | ||
| 1268 | BitField<49, 1, u64> nodep_flag; | ||
| 1269 | BitField<50, 1, u64> ms; // Multisample? | ||
| 1270 | BitField<54, 1, u64> cl; | ||
| 1271 | BitField<55, 1, u64> process_mode; | ||
| 1272 | |||
| 1273 | TextureProcessMode GetTextureProcessMode() const { | ||
| 1274 | return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL; | ||
| 1275 | } | ||
| 1276 | } tld; | ||
| 1277 | |||
| 1278 | union { | ||
| 1235 | BitField<49, 1, u64> nodep_flag; | 1279 | BitField<49, 1, u64> nodep_flag; |
| 1236 | BitField<53, 4, u64> texture_info; | 1280 | BitField<53, 4, u64> texture_info; |
| 1237 | 1281 | ||
| @@ -1281,6 +1325,35 @@ union Instruction { | |||
| 1281 | } tlds; | 1325 | } tlds; |
| 1282 | 1326 | ||
| 1283 | union { | 1327 | union { |
| 1328 | BitField<24, 2, StoreCacheManagement> cache_management; | ||
| 1329 | BitField<33, 3, ImageType> image_type; | ||
| 1330 | BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; | ||
| 1331 | BitField<51, 1, u64> is_immediate; | ||
| 1332 | BitField<52, 1, SurfaceDataMode> mode; | ||
| 1333 | |||
| 1334 | BitField<20, 3, StoreType> store_data_layout; | ||
| 1335 | BitField<20, 4, u64> component_mask_selector; | ||
| 1336 | |||
| 1337 | bool IsComponentEnabled(std::size_t component) const { | ||
| 1338 | ASSERT(mode == SurfaceDataMode::P); | ||
| 1339 | constexpr u8 R = 0b0001; | ||
| 1340 | constexpr u8 G = 0b0010; | ||
| 1341 | constexpr u8 B = 0b0100; | ||
| 1342 | constexpr u8 A = 0b1000; | ||
| 1343 | constexpr std::array<u8, 16> mask = { | ||
| 1344 | 0, (R), (G), (R | G), (B), (R | B), | ||
| 1345 | (G | B), (R | G | B), (A), (R | A), (G | A), (R | G | A), | ||
| 1346 | (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 1347 | return std::bitset<4>{mask.at(component_mask_selector)}.test(component); | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | StoreType GetStoreDataLayout() const { | ||
| 1351 | ASSERT(mode == SurfaceDataMode::D_BA); | ||
| 1352 | return store_data_layout; | ||
| 1353 | } | ||
| 1354 | } sust; | ||
| 1355 | |||
| 1356 | union { | ||
| 1284 | BitField<20, 24, u64> target; | 1357 | BitField<20, 24, u64> target; |
| 1285 | BitField<5, 1, u64> constant_buffer; | 1358 | BitField<5, 1, u64> constant_buffer; |
| 1286 | 1359 | ||
| @@ -1371,6 +1444,7 @@ union Instruction { | |||
| 1371 | 1444 | ||
| 1372 | Attribute attribute; | 1445 | Attribute attribute; |
| 1373 | Sampler sampler; | 1446 | Sampler sampler; |
| 1447 | Image image; | ||
| 1374 | 1448 | ||
| 1375 | u64 value; | 1449 | u64 value; |
| 1376 | }; | 1450 | }; |
| @@ -1408,11 +1482,13 @@ public: | |||
| 1408 | TXQ, // Texture Query | 1482 | TXQ, // Texture Query |
| 1409 | TXQ_B, // Texture Query Bindless | 1483 | TXQ_B, // Texture Query Bindless |
| 1410 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations | 1484 | TEXS, // Texture Fetch with scalar/non-vec4 source/destinations |
| 1485 | TLD, // Texture Load | ||
| 1411 | TLDS, // Texture Load with scalar/non-vec4 source/destinations | 1486 | TLDS, // Texture Load with scalar/non-vec4 source/destinations |
| 1412 | TLD4, // Texture Load 4 | 1487 | TLD4, // Texture Load 4 |
| 1413 | TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations | 1488 | TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations |
| 1414 | TMML_B, // Texture Mip Map Level | 1489 | TMML_B, // Texture Mip Map Level |
| 1415 | TMML, // Texture Mip Map Level | 1490 | TMML, // Texture Mip Map Level |
| 1491 | SUST, // Surface Store | ||
| 1416 | EXIT, | 1492 | EXIT, |
| 1417 | IPA, | 1493 | IPA, |
| 1418 | OUT_R, // Emit vertex/primitive | 1494 | OUT_R, // Emit vertex/primitive |
| @@ -1543,6 +1619,7 @@ public: | |||
| 1543 | Synch, | 1619 | Synch, |
| 1544 | Memory, | 1620 | Memory, |
| 1545 | Texture, | 1621 | Texture, |
| 1622 | Image, | ||
| 1546 | FloatSet, | 1623 | FloatSet, |
| 1547 | FloatSetPredicate, | 1624 | FloatSetPredicate, |
| 1548 | IntegerSet, | 1625 | IntegerSet, |
| @@ -1682,11 +1759,13 @@ private: | |||
| 1682 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), | 1759 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), |
| 1683 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | 1760 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), |
| 1684 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | 1761 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1762 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), | ||
| 1685 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), | 1763 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1686 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1764 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1687 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), | 1765 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1688 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1766 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1689 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), | 1767 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1768 | INST("11101011001-----", Id::SUST, Type::Image, "SUST"), | ||
| 1690 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1769 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1691 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1770 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 1692 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 1771 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 5d8d126c1..322453116 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -202,11 +202,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { | 204 | bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t size) const { |
| 205 | const GPUVAddr end = start + size; | 205 | const std::size_t inner_size = size - 1; |
| 206 | const GPUVAddr end = start + inner_size; | ||
| 206 | const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); | 207 | const auto host_ptr_start = reinterpret_cast<std::uintptr_t>(GetPointer(start)); |
| 207 | const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); | 208 | const auto host_ptr_end = reinterpret_cast<std::uintptr_t>(GetPointer(end)); |
| 208 | const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); | 209 | const auto range = static_cast<std::size_t>(host_ptr_end - host_ptr_start); |
| 209 | return range == size; | 210 | return range == inner_size; |
| 210 | } | 211 | } |
| 211 | 212 | ||
| 212 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { | 213 | void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index d7b86df38..5ee4f8e8e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -10,6 +10,10 @@ | |||
| 10 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 12 | 12 | ||
| 13 | namespace Tegra { | ||
| 14 | class MemoryManager; | ||
| 15 | } | ||
| 16 | |||
| 13 | namespace VideoCore { | 17 | namespace VideoCore { |
| 14 | 18 | ||
| 15 | enum class LoadCallbackStage { | 19 | enum class LoadCallbackStage { |
| @@ -46,8 +50,7 @@ public: | |||
| 46 | /// Attempt to use a faster method to perform a surface copy | 50 | /// Attempt to use a faster method to perform a surface copy |
| 47 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 51 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 48 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 52 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 49 | const Common::Rectangle<u32>& src_rect, | 53 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 50 | const Common::Rectangle<u32>& dst_rect) { | ||
| 51 | return false; | 54 | return false; |
| 52 | } | 55 | } |
| 53 | 56 | ||
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp new file mode 100644 index 000000000..7c926bd48 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "common/scope_exit.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 12 | |||
| 13 | namespace OpenGL { | ||
| 14 | |||
| 15 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 16 | |||
| 17 | FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; | ||
| 18 | |||
| 19 | FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; | ||
| 20 | |||
| 21 | GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { | ||
| 22 | const auto [entry, is_cache_miss] = cache.try_emplace(key); | ||
| 23 | auto& framebuffer{entry->second}; | ||
| 24 | if (is_cache_miss) { | ||
| 25 | framebuffer = CreateFramebuffer(key); | ||
| 26 | } | ||
| 27 | return framebuffer.handle; | ||
| 28 | } | ||
| 29 | |||
| 30 | OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { | ||
| 31 | OGLFramebuffer framebuffer; | ||
| 32 | framebuffer.Create(); | ||
| 33 | |||
| 34 | // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. | ||
| 35 | local_state.draw.draw_framebuffer = framebuffer.handle; | ||
| 36 | local_state.ApplyFramebufferState(); | ||
| 37 | |||
| 38 | if (key.is_single_buffer) { | ||
| 39 | if (key.color_attachments[0] != GL_NONE && key.colors[0]) { | ||
| 40 | key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER); | ||
| 41 | glDrawBuffer(key.color_attachments[0]); | ||
| 42 | } else { | ||
| 43 | glDrawBuffer(GL_NONE); | ||
| 44 | } | ||
| 45 | } else { | ||
| 46 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 47 | if (key.colors[index]) { | ||
| 48 | key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), | ||
| 49 | GL_DRAW_FRAMEBUFFER); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | glDrawBuffers(key.colors_count, key.color_attachments.data()); | ||
| 53 | } | ||
| 54 | |||
| 55 | if (key.zeta) { | ||
| 56 | key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT, | ||
| 57 | GL_DRAW_FRAMEBUFFER); | ||
| 58 | } | ||
| 59 | |||
| 60 | return framebuffer; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::size_t FramebufferCacheKey::Hash() const { | ||
| 64 | static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct"); | ||
| 65 | return static_cast<std::size_t>( | ||
| 66 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 67 | } | ||
| 68 | |||
| 69 | bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const { | ||
| 70 | return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors, | ||
| 71 | zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count, | ||
| 72 | rhs.color_attachments, rhs.colors, rhs.zeta); | ||
| 73 | } | ||
| 74 | |||
| 75 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h new file mode 100644 index 000000000..a3a996353 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <glad/glad.h> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 18 | |||
| 19 | namespace OpenGL { | ||
| 20 | |||
| 21 | struct alignas(sizeof(u64)) FramebufferCacheKey { | ||
| 22 | bool is_single_buffer = false; | ||
| 23 | bool stencil_enable = false; | ||
| 24 | u16 colors_count = 0; | ||
| 25 | |||
| 26 | std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{}; | ||
| 27 | std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; | ||
| 28 | View zeta; | ||
| 29 | |||
| 30 | std::size_t Hash() const; | ||
| 31 | |||
| 32 | bool operator==(const FramebufferCacheKey& rhs) const; | ||
| 33 | |||
| 34 | bool operator!=(const FramebufferCacheKey& rhs) const { | ||
| 35 | return !operator==(rhs); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace OpenGL | ||
| 40 | |||
| 41 | namespace std { | ||
| 42 | |||
| 43 | template <> | ||
| 44 | struct hash<OpenGL::FramebufferCacheKey> { | ||
| 45 | std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { | ||
| 46 | return k.Hash(); | ||
| 47 | } | ||
| 48 | }; | ||
| 49 | |||
| 50 | } // namespace std | ||
| 51 | |||
| 52 | namespace OpenGL { | ||
| 53 | |||
| 54 | class FramebufferCacheOpenGL { | ||
| 55 | public: | ||
| 56 | FramebufferCacheOpenGL(); | ||
| 57 | ~FramebufferCacheOpenGL(); | ||
| 58 | |||
| 59 | GLuint GetFramebuffer(const FramebufferCacheKey& key); | ||
| 60 | |||
| 61 | private: | ||
| 62 | OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); | ||
| 63 | |||
| 64 | OpenGLState local_state; | ||
| 65 | std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; | ||
| 66 | }; | ||
| 67 | |||
| 68 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d77426067..f45a3c5ef 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -29,8 +29,10 @@ | |||
| 29 | namespace OpenGL { | 29 | namespace OpenGL { |
| 30 | 30 | ||
| 31 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 31 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 32 | using PixelFormat = VideoCore::Surface::PixelFormat; | 32 | |
| 33 | using SurfaceType = VideoCore::Surface::SurfaceType; | 33 | using VideoCore::Surface::PixelFormat; |
| 34 | using VideoCore::Surface::SurfaceTarget; | ||
| 35 | using VideoCore::Surface::SurfaceType; | ||
| 34 | 36 | ||
| 35 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); | 37 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); |
| 36 | MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); | 38 | MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); |
| @@ -78,29 +80,9 @@ struct DrawParameters { | |||
| 78 | } | 80 | } |
| 79 | }; | 81 | }; |
| 80 | 82 | ||
| 81 | struct FramebufferCacheKey { | ||
| 82 | bool is_single_buffer = false; | ||
| 83 | bool stencil_enable = false; | ||
| 84 | |||
| 85 | std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{}; | ||
| 86 | std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{}; | ||
| 87 | u32 colors_count = 0; | ||
| 88 | |||
| 89 | GLuint zeta = 0; | ||
| 90 | |||
| 91 | auto Tie() const { | ||
| 92 | return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count, | ||
| 93 | zeta); | ||
| 94 | } | ||
| 95 | |||
| 96 | bool operator<(const FramebufferCacheKey& rhs) const { | ||
| 97 | return Tie() < rhs.Tie(); | ||
| 98 | } | ||
| 99 | }; | ||
| 100 | |||
| 101 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 83 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 102 | ScreenInfo& info) | 84 | ScreenInfo& info) |
| 103 | : res_cache{*this}, shader_cache{*this, system, emu_window, device}, | 85 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 104 | global_cache{*this}, system{system}, screen_info{info}, | 86 | global_cache{*this}, system{system}, screen_info{info}, |
| 105 | buffer_cache(*this, STREAM_BUFFER_SIZE) { | 87 | buffer_cache(*this, STREAM_BUFFER_SIZE) { |
| 106 | OpenGLState::ApplyDefaultState(); | 88 | OpenGLState::ApplyDefaultState(); |
| @@ -121,11 +103,6 @@ void RasterizerOpenGL::CheckExtensions() { | |||
| 121 | Render_OpenGL, | 103 | Render_OpenGL, |
| 122 | "Anisotropic filter is not supported! This can cause graphical issues in some games."); | 104 | "Anisotropic filter is not supported! This can cause graphical issues in some games."); |
| 123 | } | 105 | } |
| 124 | if (!GLAD_GL_ARB_buffer_storage) { | ||
| 125 | LOG_WARNING( | ||
| 126 | Render_OpenGL, | ||
| 127 | "Buffer storage control is not supported! This can cause performance degradation."); | ||
| 128 | } | ||
| 129 | } | 106 | } |
| 130 | 107 | ||
| 131 | GLuint RasterizerOpenGL::SetupVertexFormat() { | 108 | GLuint RasterizerOpenGL::SetupVertexFormat() { |
| @@ -302,8 +279,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 302 | static_cast<GLsizeiptr>(sizeof(ubo))); | 279 | static_cast<GLsizeiptr>(sizeof(ubo))); |
| 303 | 280 | ||
| 304 | Shader shader{shader_cache.GetStageProgram(program)}; | 281 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 305 | const auto [program_handle, next_bindings] = | 282 | |
| 306 | shader->GetProgramHandle(primitive_mode, base_bindings); | 283 | const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; |
| 284 | SetupDrawConstBuffers(stage_enum, shader); | ||
| 285 | SetupGlobalRegions(stage_enum, shader); | ||
| 286 | const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | ||
| 287 | |||
| 288 | const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | ||
| 289 | const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant); | ||
| 307 | 290 | ||
| 308 | switch (program) { | 291 | switch (program) { |
| 309 | case Maxwell::ShaderProgram::VertexA: | 292 | case Maxwell::ShaderProgram::VertexA: |
| @@ -321,11 +304,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 321 | shader_config.enable.Value(), shader_config.offset); | 304 | shader_config.enable.Value(), shader_config.offset); |
| 322 | } | 305 | } |
| 323 | 306 | ||
| 324 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); | ||
| 325 | SetupDrawConstBuffers(stage_enum, shader); | ||
| 326 | SetupGlobalRegions(stage_enum, shader); | ||
| 327 | SetupTextures(stage_enum, shader, base_bindings); | ||
| 328 | |||
| 329 | // Workaround for Intel drivers. | 307 | // Workaround for Intel drivers. |
| 330 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 308 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 331 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 309 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| @@ -351,44 +329,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 351 | gpu.dirty_flags.shaders = false; | 329 | gpu.dirty_flags.shaders = false; |
| 352 | } | 330 | } |
| 353 | 331 | ||
| 354 | void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, | ||
| 355 | OpenGLState& current_state) { | ||
| 356 | const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); | ||
| 357 | auto& framebuffer = entry->second; | ||
| 358 | |||
| 359 | if (is_cache_miss) | ||
| 360 | framebuffer.Create(); | ||
| 361 | |||
| 362 | current_state.draw.draw_framebuffer = framebuffer.handle; | ||
| 363 | current_state.ApplyFramebufferState(); | ||
| 364 | |||
| 365 | if (!is_cache_miss) | ||
| 366 | return; | ||
| 367 | |||
| 368 | if (fbkey.is_single_buffer) { | ||
| 369 | if (fbkey.color_attachments[0] != GL_NONE) { | ||
| 370 | glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0], | ||
| 371 | 0); | ||
| 372 | } | ||
| 373 | glDrawBuffer(fbkey.color_attachments[0]); | ||
| 374 | } else { | ||
| 375 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 376 | if (fbkey.colors[index]) { | ||
| 377 | glFramebufferTexture(GL_DRAW_FRAMEBUFFER, | ||
| 378 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), | ||
| 379 | fbkey.colors[index], 0); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data()); | ||
| 383 | } | ||
| 384 | |||
| 385 | if (fbkey.zeta) { | ||
| 386 | GLenum zeta_attachment = | ||
| 387 | fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT; | ||
| 388 | glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0); | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | 332 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { |
| 393 | const auto& regs = system.GPU().Maxwell3D().regs; | 333 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 394 | 334 | ||
| @@ -478,9 +418,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 478 | } | 418 | } |
| 479 | current_framebuffer_config_state = fb_config_state; | 419 | current_framebuffer_config_state = fb_config_state; |
| 480 | 420 | ||
| 481 | Surface depth_surface; | 421 | texture_cache.GuardRenderTargets(true); |
| 422 | |||
| 423 | View depth_surface{}; | ||
| 482 | if (using_depth_fb) { | 424 | if (using_depth_fb) { |
| 483 | depth_surface = res_cache.GetDepthBufferSurface(preserve_contents); | 425 | depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents); |
| 426 | } else { | ||
| 427 | texture_cache.SetEmptyDepthBuffer(); | ||
| 484 | } | 428 | } |
| 485 | 429 | ||
| 486 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); | 430 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); |
| @@ -493,13 +437,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 493 | if (using_color_fb) { | 437 | if (using_color_fb) { |
| 494 | if (single_color_target) { | 438 | if (single_color_target) { |
| 495 | // Used when just a single color attachment is enabled, e.g. for clearing a color buffer | 439 | // Used when just a single color attachment is enabled, e.g. for clearing a color buffer |
| 496 | Surface color_surface = | 440 | View color_surface{ |
| 497 | res_cache.GetColorBufferSurface(*single_color_target, preserve_contents); | 441 | texture_cache.GetColorBufferSurface(*single_color_target, preserve_contents)}; |
| 498 | 442 | ||
| 499 | if (color_surface) { | 443 | if (color_surface) { |
| 500 | // Assume that a surface will be written to if it is used as a framebuffer, even if | 444 | // Assume that a surface will be written to if it is used as a framebuffer, even if |
| 501 | // the shader doesn't actually write to it. | 445 | // the shader doesn't actually write to it. |
| 502 | color_surface->MarkAsModified(true, res_cache); | 446 | texture_cache.MarkColorBufferInUse(*single_color_target); |
| 503 | // Workaround for and issue in nvidia drivers | 447 | // Workaround for and issue in nvidia drivers |
| 504 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ | 448 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ |
| 505 | state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; | 449 | state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion; |
| @@ -508,16 +452,21 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 508 | fbkey.is_single_buffer = true; | 452 | fbkey.is_single_buffer = true; |
| 509 | fbkey.color_attachments[0] = | 453 | fbkey.color_attachments[0] = |
| 510 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); | 454 | GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target); |
| 511 | fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0; | 455 | fbkey.colors[0] = color_surface; |
| 456 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | ||
| 457 | if (index != *single_color_target) { | ||
| 458 | texture_cache.SetEmptyColorBuffer(index); | ||
| 459 | } | ||
| 460 | } | ||
| 512 | } else { | 461 | } else { |
| 513 | // Multiple color attachments are enabled | 462 | // Multiple color attachments are enabled |
| 514 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { | 463 | for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
| 515 | Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents); | 464 | View color_surface{texture_cache.GetColorBufferSurface(index, preserve_contents)}; |
| 516 | 465 | ||
| 517 | if (color_surface) { | 466 | if (color_surface) { |
| 518 | // Assume that a surface will be written to if it is used as a framebuffer, even | 467 | // Assume that a surface will be written to if it is used as a framebuffer, even |
| 519 | // if the shader doesn't actually write to it. | 468 | // if the shader doesn't actually write to it. |
| 520 | color_surface->MarkAsModified(true, res_cache); | 469 | texture_cache.MarkColorBufferInUse(index); |
| 521 | // Enable sRGB only for supported formats | 470 | // Enable sRGB only for supported formats |
| 522 | // Workaround for and issue in nvidia drivers | 471 | // Workaround for and issue in nvidia drivers |
| 523 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ | 472 | // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/ |
| @@ -527,8 +476,7 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 527 | 476 | ||
| 528 | fbkey.color_attachments[index] = | 477 | fbkey.color_attachments[index] = |
| 529 | GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); | 478 | GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index); |
| 530 | fbkey.colors[index] = | 479 | fbkey.colors[index] = color_surface; |
| 531 | color_surface != nullptr ? color_surface->Texture().handle : 0; | ||
| 532 | } | 480 | } |
| 533 | fbkey.is_single_buffer = false; | 481 | fbkey.is_single_buffer = false; |
| 534 | fbkey.colors_count = regs.rt_control.count; | 482 | fbkey.colors_count = regs.rt_control.count; |
| @@ -541,14 +489,16 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( | |||
| 541 | if (depth_surface) { | 489 | if (depth_surface) { |
| 542 | // Assume that a surface will be written to if it is used as a framebuffer, even if | 490 | // Assume that a surface will be written to if it is used as a framebuffer, even if |
| 543 | // the shader doesn't actually write to it. | 491 | // the shader doesn't actually write to it. |
| 544 | depth_surface->MarkAsModified(true, res_cache); | 492 | texture_cache.MarkDepthBufferInUse(); |
| 545 | 493 | ||
| 546 | fbkey.zeta = depth_surface->Texture().handle; | 494 | fbkey.zeta = depth_surface; |
| 547 | fbkey.stencil_enable = regs.stencil_enable && | 495 | fbkey.stencil_enable = regs.stencil_enable && |
| 548 | depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; | 496 | depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; |
| 549 | } | 497 | } |
| 550 | 498 | ||
| 551 | SetupCachedFramebuffer(fbkey, current_state); | 499 | texture_cache.GuardRenderTargets(false); |
| 500 | |||
| 501 | current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(fbkey); | ||
| 552 | SyncViewport(current_state); | 502 | SyncViewport(current_state); |
| 553 | 503 | ||
| 554 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; | 504 | return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; |
| @@ -630,6 +580,7 @@ void RasterizerOpenGL::Clear() { | |||
| 630 | clear_state.ApplyDepth(); | 580 | clear_state.ApplyDepth(); |
| 631 | clear_state.ApplyStencilTest(); | 581 | clear_state.ApplyStencilTest(); |
| 632 | clear_state.ApplyViewport(); | 582 | clear_state.ApplyViewport(); |
| 583 | clear_state.ApplyFramebufferState(); | ||
| 633 | 584 | ||
| 634 | if (use_color) { | 585 | if (use_color) { |
| 635 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 586 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| @@ -652,7 +603,6 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 652 | auto& gpu = system.GPU().Maxwell3D(); | 603 | auto& gpu = system.GPU().Maxwell3D(); |
| 653 | const auto& regs = gpu.regs; | 604 | const auto& regs = gpu.regs; |
| 654 | 605 | ||
| 655 | ConfigureFramebuffers(state); | ||
| 656 | SyncColorMask(); | 606 | SyncColorMask(); |
| 657 | SyncFragmentColorClampState(); | 607 | SyncFragmentColorClampState(); |
| 658 | SyncMultiSampleState(); | 608 | SyncMultiSampleState(); |
| @@ -697,16 +647,22 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 697 | SetupVertexBuffer(vao); | 647 | SetupVertexBuffer(vao); |
| 698 | 648 | ||
| 699 | DrawParameters params = SetupDraw(); | 649 | DrawParameters params = SetupDraw(); |
| 650 | texture_cache.GuardSamplers(true); | ||
| 700 | SetupShaders(params.primitive_mode); | 651 | SetupShaders(params.primitive_mode); |
| 652 | texture_cache.GuardSamplers(false); | ||
| 653 | |||
| 654 | ConfigureFramebuffers(state); | ||
| 701 | 655 | ||
| 702 | buffer_cache.Unmap(); | 656 | buffer_cache.Unmap(); |
| 703 | 657 | ||
| 704 | shader_program_manager->ApplyTo(state); | 658 | shader_program_manager->ApplyTo(state); |
| 705 | state.Apply(); | 659 | state.Apply(); |
| 706 | 660 | ||
| 707 | res_cache.SignalPreDrawCall(); | 661 | if (texture_cache.TextureBarrier()) { |
| 662 | glTextureBarrier(); | ||
| 663 | } | ||
| 664 | |||
| 708 | params.DispatchDraw(); | 665 | params.DispatchDraw(); |
| 709 | res_cache.SignalPostDrawCall(); | ||
| 710 | 666 | ||
| 711 | accelerate_draw = AccelDraw::Disabled; | 667 | accelerate_draw = AccelDraw::Disabled; |
| 712 | } | 668 | } |
| @@ -718,7 +674,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 718 | if (!addr || !size) { | 674 | if (!addr || !size) { |
| 719 | return; | 675 | return; |
| 720 | } | 676 | } |
| 721 | res_cache.FlushRegion(addr, size); | 677 | texture_cache.FlushRegion(addr, size); |
| 722 | global_cache.FlushRegion(addr, size); | 678 | global_cache.FlushRegion(addr, size); |
| 723 | } | 679 | } |
| 724 | 680 | ||
| @@ -727,23 +683,24 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 727 | if (!addr || !size) { | 683 | if (!addr || !size) { |
| 728 | return; | 684 | return; |
| 729 | } | 685 | } |
| 730 | res_cache.InvalidateRegion(addr, size); | 686 | texture_cache.InvalidateRegion(addr, size); |
| 731 | shader_cache.InvalidateRegion(addr, size); | 687 | shader_cache.InvalidateRegion(addr, size); |
| 732 | global_cache.InvalidateRegion(addr, size); | 688 | global_cache.InvalidateRegion(addr, size); |
| 733 | buffer_cache.InvalidateRegion(addr, size); | 689 | buffer_cache.InvalidateRegion(addr, size); |
| 734 | } | 690 | } |
| 735 | 691 | ||
| 736 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 692 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| 737 | FlushRegion(addr, size); | 693 | if (Settings::values.use_accurate_gpu_emulation) { |
| 694 | FlushRegion(addr, size); | ||
| 695 | } | ||
| 738 | InvalidateRegion(addr, size); | 696 | InvalidateRegion(addr, size); |
| 739 | } | 697 | } |
| 740 | 698 | ||
| 741 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 699 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 742 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 700 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 743 | const Common::Rectangle<u32>& src_rect, | 701 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 744 | const Common::Rectangle<u32>& dst_rect) { | ||
| 745 | MICROPROFILE_SCOPE(OpenGL_Blits); | 702 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 746 | res_cache.FermiCopySurface(src, dst, src_rect, dst_rect); | 703 | texture_cache.DoFermiCopy(src, dst, copy_config); |
| 747 | return true; | 704 | return true; |
| 748 | } | 705 | } |
| 749 | 706 | ||
| @@ -755,7 +712,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 755 | 712 | ||
| 756 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 713 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 757 | 714 | ||
| 758 | const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; | 715 | const auto surface{ |
| 716 | texture_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; | ||
| 759 | if (!surface) { | 717 | if (!surface) { |
| 760 | return {}; | 718 | return {}; |
| 761 | } | 719 | } |
| @@ -771,7 +729,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 771 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); | 729 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); |
| 772 | } | 730 | } |
| 773 | 731 | ||
| 774 | screen_info.display_texture = surface->Texture().handle; | 732 | screen_info.display_texture = surface->GetTexture(); |
| 775 | 733 | ||
| 776 | return true; | 734 | return true; |
| 777 | } | 735 | } |
| @@ -837,8 +795,8 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade | |||
| 837 | } | 795 | } |
| 838 | } | 796 | } |
| 839 | 797 | ||
| 840 | void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | 798 | TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, |
| 841 | BaseBindings base_bindings) { | 799 | BaseBindings base_bindings) { |
| 842 | MICROPROFILE_SCOPE(OpenGL_Texture); | 800 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 843 | const auto& gpu = system.GPU(); | 801 | const auto& gpu = system.GPU(); |
| 844 | const auto& maxwell3d = gpu.Maxwell3D(); | 802 | const auto& maxwell3d = gpu.Maxwell3D(); |
| @@ -847,6 +805,8 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s | |||
| 847 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), | 805 | ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), |
| 848 | "Exceeded the number of active textures."); | 806 | "Exceeded the number of active textures."); |
| 849 | 807 | ||
| 808 | TextureBufferUsage texture_buffer_usage{0}; | ||
| 809 | |||
| 850 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 810 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { |
| 851 | const auto& entry = entries[bindpoint]; | 811 | const auto& entry = entries[bindpoint]; |
| 852 | Tegra::Texture::FullTextureInfo texture; | 812 | Tegra::Texture::FullTextureInfo texture; |
| @@ -860,18 +820,26 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s | |||
| 860 | } | 820 | } |
| 861 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; | 821 | const u32 current_bindpoint = base_bindings.sampler + bindpoint; |
| 862 | 822 | ||
| 863 | state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc); | 823 | auto& unit{state.texture_units[current_bindpoint]}; |
| 824 | unit.sampler = sampler_cache.GetSampler(texture.tsc); | ||
| 864 | 825 | ||
| 865 | if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { | 826 | if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) { |
| 866 | state.texture_units[current_bindpoint].texture = | 827 | if (view->GetSurfaceParams().IsBuffer()) { |
| 867 | surface->Texture(entry.IsArray()).handle; | 828 | // Record that this texture is a texture buffer. |
| 868 | surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | 829 | texture_buffer_usage.set(bindpoint); |
| 830 | } else { | ||
| 831 | // Apply swizzle to textures that are not buffers. | ||
| 832 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | ||
| 869 | texture.tic.w_source); | 833 | texture.tic.w_source); |
| 834 | } | ||
| 835 | state.texture_units[current_bindpoint].texture = view->GetTexture(); | ||
| 870 | } else { | 836 | } else { |
| 871 | // Can occur when texture addr is null or its memory is unmapped/invalid | 837 | // Can occur when texture addr is null or its memory is unmapped/invalid |
| 872 | state.texture_units[current_bindpoint].texture = 0; | 838 | unit.texture = 0; |
| 873 | } | 839 | } |
| 874 | } | 840 | } |
| 841 | |||
| 842 | return texture_buffer_usage; | ||
| 875 | } | 843 | } |
| 876 | 844 | ||
| 877 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | 845 | void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index f7671ff5d..bf67e3a70 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -23,14 +23,15 @@ | |||
| 23 | #include "video_core/rasterizer_interface.h" | 23 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | ||
| 26 | #include "video_core/renderer_opengl/gl_global_cache.h" | 27 | #include "video_core/renderer_opengl/gl_global_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 31 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 32 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 33 | #include "video_core/renderer_opengl/gl_state.h" | 33 | #include "video_core/renderer_opengl/gl_state.h" |
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 34 | #include "video_core/renderer_opengl/utils.h" | 35 | #include "video_core/renderer_opengl/utils.h" |
| 35 | 36 | ||
| 36 | namespace Core { | 37 | namespace Core { |
| @@ -41,11 +42,14 @@ namespace Core::Frontend { | |||
| 41 | class EmuWindow; | 42 | class EmuWindow; |
| 42 | } | 43 | } |
| 43 | 44 | ||
| 45 | namespace Tegra { | ||
| 46 | class MemoryManager; | ||
| 47 | } | ||
| 48 | |||
| 44 | namespace OpenGL { | 49 | namespace OpenGL { |
| 45 | 50 | ||
| 46 | struct ScreenInfo; | 51 | struct ScreenInfo; |
| 47 | struct DrawParameters; | 52 | struct DrawParameters; |
| 48 | struct FramebufferCacheKey; | ||
| 49 | 53 | ||
| 50 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { | 54 | class RasterizerOpenGL : public VideoCore::RasterizerInterface { |
| 51 | public: | 55 | public: |
| @@ -61,8 +65,7 @@ public: | |||
| 61 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 62 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 63 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 64 | const Common::Rectangle<u32>& src_rect, | 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| 65 | const Common::Rectangle<u32>& dst_rect) override; | ||
| 66 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 69 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 67 | u32 pixel_stride) override; | 70 | u32 pixel_stride) override; |
| 68 | bool AccelerateDrawBatch(bool is_indexed) override; | 71 | bool AccelerateDrawBatch(bool is_indexed) override; |
| @@ -95,6 +98,8 @@ private: | |||
| 95 | 98 | ||
| 96 | /** | 99 | /** |
| 97 | * Configures the color and depth framebuffer states. | 100 | * Configures the color and depth framebuffer states. |
| 101 | * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure | ||
| 102 | * again. Used by the texture cache to solve texception conflicts | ||
| 98 | * @param use_color_fb If true, configure color framebuffers. | 103 | * @param use_color_fb If true, configure color framebuffers. |
| 99 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. | 104 | * @param using_depth_fb If true, configure the depth/stencil framebuffer. |
| 100 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. | 105 | * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. |
| @@ -118,9 +123,10 @@ private: | |||
| 118 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 123 | void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 119 | const Shader& shader); | 124 | const Shader& shader); |
| 120 | 125 | ||
| 121 | /// Configures the current textures to use for the draw command. | 126 | /// Configures the current textures to use for the draw command. Returns shaders texture buffer |
| 122 | void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, | 127 | /// usage. |
| 123 | BaseBindings base_bindings); | 128 | TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 129 | const Shader& shader, BaseBindings base_bindings); | ||
| 124 | 130 | ||
| 125 | /// Syncs the viewport and depth range to match the guest state | 131 | /// Syncs the viewport and depth range to match the guest state |
| 126 | void SyncViewport(OpenGLState& current_state); | 132 | void SyncViewport(OpenGLState& current_state); |
| @@ -181,10 +187,11 @@ private: | |||
| 181 | const Device device; | 187 | const Device device; |
| 182 | OpenGLState state; | 188 | OpenGLState state; |
| 183 | 189 | ||
| 184 | RasterizerCacheOpenGL res_cache; | 190 | TextureCacheOpenGL texture_cache; |
| 185 | ShaderCacheOpenGL shader_cache; | 191 | ShaderCacheOpenGL shader_cache; |
| 186 | GlobalRegionCacheOpenGL global_cache; | 192 | GlobalRegionCacheOpenGL global_cache; |
| 187 | SamplerCacheOpenGL sampler_cache; | 193 | SamplerCacheOpenGL sampler_cache; |
| 194 | FramebufferCacheOpenGL framebuffer_cache; | ||
| 188 | 195 | ||
| 189 | Core::System& system; | 196 | Core::System& system; |
| 190 | ScreenInfo& screen_info; | 197 | ScreenInfo& screen_info; |
| @@ -195,7 +202,6 @@ private: | |||
| 195 | OGLVertexArray> | 202 | OGLVertexArray> |
| 196 | vertex_array_cache; | 203 | vertex_array_cache; |
| 197 | 204 | ||
| 198 | std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; | ||
| 199 | FramebufferConfigState current_framebuffer_config_state; | 205 | FramebufferConfigState current_framebuffer_config_state; |
| 200 | std::pair<bool, bool> current_depth_stencil_usage{}; | 206 | std::pair<bool, bool> current_depth_stencil_usage{}; |
| 201 | 207 | ||
| @@ -218,8 +224,6 @@ private: | |||
| 218 | 224 | ||
| 219 | void SetupShaders(GLenum primitive_mode); | 225 | void SetupShaders(GLenum primitive_mode); |
| 220 | 226 | ||
| 221 | void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state); | ||
| 222 | |||
| 223 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 227 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 224 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 228 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 225 | 229 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp deleted file mode 100644 index a7681902e..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ /dev/null | |||
| @@ -1,1362 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <optional> | ||
| 7 | #include <glad/glad.h> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | #include "common/microprofile.h" | ||
| 13 | #include "common/scope_exit.h" | ||
| 14 | #include "core/core.h" | ||
| 15 | #include "core/hle/kernel/process.h" | ||
| 16 | #include "core/settings.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/morton.h" | ||
| 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/utils.h" | ||
| 23 | #include "video_core/surface.h" | ||
| 24 | #include "video_core/textures/convert.h" | ||
| 25 | #include "video_core/textures/decoders.h" | ||
| 26 | |||
| 27 | namespace OpenGL { | ||
| 28 | |||
| 29 | using VideoCore::MortonSwizzle; | ||
| 30 | using VideoCore::MortonSwizzleMode; | ||
| 31 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | ||
| 32 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | ||
| 33 | using VideoCore::Surface::ComponentTypeFromTexture; | ||
| 34 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 35 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 36 | using VideoCore::Surface::PixelFormatFromTextureFormat; | ||
| 37 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 38 | |||
| 39 | struct FormatTuple { | ||
| 40 | GLint internal_format; | ||
| 41 | GLenum format; | ||
| 42 | GLenum type; | ||
| 43 | ComponentType component_type; | ||
| 44 | bool compressed; | ||
| 45 | }; | ||
| 46 | |||
| 47 | static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { | ||
| 48 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 49 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||
| 50 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||
| 51 | glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||
| 52 | glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); | ||
| 53 | if (max_mip_level == 1) { | ||
| 54 | glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) { | ||
| 59 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 60 | |||
| 61 | gpu_addr = gpu_addr_; | ||
| 62 | host_ptr = memory_manager.GetPointer(gpu_addr_); | ||
| 63 | size_in_bytes = SizeInBytesRaw(); | ||
| 64 | |||
| 65 | if (IsPixelFormatASTC(pixel_format)) { | ||
| 66 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 67 | size_in_bytes_gl = width * height * depth * 4; | ||
| 68 | } else { | ||
| 69 | size_in_bytes_gl = SizeInBytesGL(); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | |||
| 73 | std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only, | ||
| 74 | bool uncompressed) const { | ||
| 75 | const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; | ||
| 76 | const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; | ||
| 77 | const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; | ||
| 78 | u32 m_depth = (layer_only ? 1U : depth); | ||
| 79 | u32 m_width = MipWidth(mip_level); | ||
| 80 | u32 m_height = MipHeight(mip_level); | ||
| 81 | m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x); | ||
| 82 | m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y); | ||
| 83 | m_depth = std::max(1U, m_depth >> mip_level); | ||
| 84 | u32 m_block_height = MipBlockHeight(mip_level); | ||
| 85 | u32 m_block_depth = MipBlockDepth(mip_level); | ||
| 86 | return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width, | ||
| 87 | m_height, m_depth, m_block_height, m_block_depth); | ||
| 88 | } | ||
| 89 | |||
| 90 | std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, | ||
| 91 | bool uncompressed) const { | ||
| 92 | std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth; | ||
| 93 | std::size_t size = 0; | ||
| 94 | for (u32 i = 0; i < max_mip_level; i++) { | ||
| 95 | size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed); | ||
| 96 | } | ||
| 97 | if (!force_gl && is_tiled) { | ||
| 98 | size = Common::AlignUp(size, block_size_bytes); | ||
| 99 | } | ||
| 100 | return size; | ||
| 101 | } | ||
| 102 | |||
| 103 | /*static*/ SurfaceParams SurfaceParams::CreateForTexture( | ||
| 104 | const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { | ||
| 105 | SurfaceParams params{}; | ||
| 106 | params.is_tiled = config.tic.IsTiled(); | ||
| 107 | params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, | ||
| 108 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | ||
| 109 | params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, | ||
| 110 | params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; | ||
| 111 | params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); | ||
| 112 | params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), | ||
| 113 | params.srgb_conversion); | ||
| 114 | |||
| 115 | if (config.tsc.depth_compare_enabled) { | ||
| 116 | // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, | ||
| 117 | // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also | ||
| 118 | // causes GetFormatType to properly return 'Depth' below). | ||
| 119 | if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) { | ||
| 120 | switch (params.pixel_format) { | ||
| 121 | case PixelFormat::R16S: | ||
| 122 | case PixelFormat::R16U: | ||
| 123 | case PixelFormat::R16F: | ||
| 124 | params.pixel_format = PixelFormat::Z16; | ||
| 125 | break; | ||
| 126 | case PixelFormat::R32F: | ||
| 127 | params.pixel_format = PixelFormat::Z32F; | ||
| 128 | break; | ||
| 129 | default: | ||
| 130 | LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}", | ||
| 131 | static_cast<u32>(params.pixel_format)); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); | ||
| 138 | params.type = GetFormatType(params.pixel_format); | ||
| 139 | UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled); | ||
| 140 | |||
| 141 | params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); | ||
| 142 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); | ||
| 143 | if (!params.is_tiled) { | ||
| 144 | params.pitch = config.tic.Pitch(); | ||
| 145 | } | ||
| 146 | params.unaligned_height = config.tic.Height(); | ||
| 147 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); | ||
| 148 | params.identity = SurfaceClass::Uploaded; | ||
| 149 | |||
| 150 | switch (params.target) { | ||
| 151 | case SurfaceTarget::Texture1D: | ||
| 152 | case SurfaceTarget::Texture2D: | ||
| 153 | params.depth = 1; | ||
| 154 | break; | ||
| 155 | case SurfaceTarget::TextureCubemap: | ||
| 156 | params.depth = config.tic.Depth() * 6; | ||
| 157 | break; | ||
| 158 | case SurfaceTarget::Texture3D: | ||
| 159 | params.depth = config.tic.Depth(); | ||
| 160 | break; | ||
| 161 | case SurfaceTarget::Texture2DArray: | ||
| 162 | params.depth = config.tic.Depth(); | ||
| 163 | if (!entry.IsArray()) { | ||
| 164 | // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of | ||
| 165 | // one, but sample the texture in the shader as if it were not an array texture. This | ||
| 166 | // probably is valid on hardware, but we still need to write a test to confirm this. In | ||
| 167 | // emulation, the workaround here is to continue to treat this as a Texture2D. An | ||
| 168 | // example game that does this is Super Mario Odyssey (in Cloud Kingdom). | ||
| 169 | ASSERT(params.depth == 1); | ||
| 170 | params.target = SurfaceTarget::Texture2D; | ||
| 171 | } | ||
| 172 | break; | ||
| 173 | case SurfaceTarget::TextureCubeArray: | ||
| 174 | params.depth = config.tic.Depth() * 6; | ||
| 175 | if (!entry.IsArray()) { | ||
| 176 | ASSERT(params.depth == 6); | ||
| 177 | params.target = SurfaceTarget::TextureCubemap; | ||
| 178 | } | ||
| 179 | break; | ||
| 180 | default: | ||
| 181 | LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); | ||
| 182 | UNREACHABLE(); | ||
| 183 | params.depth = 1; | ||
| 184 | break; | ||
| 185 | } | ||
| 186 | |||
| 187 | params.is_layered = SurfaceTargetIsLayered(params.target); | ||
| 188 | params.is_array = SurfaceTargetIsArray(params.target); | ||
| 189 | params.max_mip_level = config.tic.max_mip_level + 1; | ||
| 190 | params.rt = {}; | ||
| 191 | |||
| 192 | params.InitCacheParameters(config.tic.Address()); | ||
| 193 | |||
| 194 | return params; | ||
| 195 | } | ||
| 196 | |||
| 197 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { | ||
| 198 | const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; | ||
| 199 | SurfaceParams params{}; | ||
| 200 | |||
| 201 | params.is_tiled = | ||
| 202 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 203 | params.block_width = 1 << config.memory_layout.block_width; | ||
| 204 | params.block_height = 1 << config.memory_layout.block_height; | ||
| 205 | params.block_depth = 1 << config.memory_layout.block_depth; | ||
| 206 | params.tile_width_spacing = 1; | ||
| 207 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 208 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 209 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 210 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 211 | params.type = GetFormatType(params.pixel_format); | ||
| 212 | if (params.is_tiled) { | ||
| 213 | params.width = config.width; | ||
| 214 | } else { | ||
| 215 | params.pitch = config.width; | ||
| 216 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 217 | params.width = params.pitch / bpp; | ||
| 218 | } | ||
| 219 | params.height = config.height; | ||
| 220 | params.unaligned_height = config.height; | ||
| 221 | params.target = SurfaceTarget::Texture2D; | ||
| 222 | params.identity = SurfaceClass::RenderTarget; | ||
| 223 | params.depth = 1; | ||
| 224 | params.max_mip_level = 1; | ||
| 225 | params.is_layered = false; | ||
| 226 | |||
| 227 | // Render target specific parameters, not used for caching | ||
| 228 | params.rt.index = static_cast<u32>(index); | ||
| 229 | params.rt.array_mode = config.array_mode; | ||
| 230 | params.rt.layer_stride = config.layer_stride; | ||
| 231 | params.rt.volume = config.volume; | ||
| 232 | params.rt.base_layer = config.base_layer; | ||
| 233 | |||
| 234 | params.InitCacheParameters(config.Address()); | ||
| 235 | |||
| 236 | return params; | ||
| 237 | } | ||
| 238 | |||
| 239 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( | ||
| 240 | u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, | ||
| 241 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 242 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 243 | SurfaceParams params{}; | ||
| 244 | |||
| 245 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 246 | params.block_width = 1 << std::min(block_width, 5U); | ||
| 247 | params.block_height = 1 << std::min(block_height, 5U); | ||
| 248 | params.block_depth = 1 << std::min(block_depth, 5U); | ||
| 249 | params.tile_width_spacing = 1; | ||
| 250 | params.pixel_format = PixelFormatFromDepthFormat(format); | ||
| 251 | params.component_type = ComponentTypeFromDepthFormat(format); | ||
| 252 | params.type = GetFormatType(params.pixel_format); | ||
| 253 | params.srgb_conversion = false; | ||
| 254 | params.width = zeta_width; | ||
| 255 | params.height = zeta_height; | ||
| 256 | params.unaligned_height = zeta_height; | ||
| 257 | params.target = SurfaceTarget::Texture2D; | ||
| 258 | params.identity = SurfaceClass::DepthBuffer; | ||
| 259 | params.depth = 1; | ||
| 260 | params.max_mip_level = 1; | ||
| 261 | params.is_layered = false; | ||
| 262 | params.rt = {}; | ||
| 263 | |||
| 264 | params.InitCacheParameters(zeta_address); | ||
| 265 | |||
| 266 | return params; | ||
| 267 | } | ||
| 268 | |||
| 269 | /*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 270 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 271 | SurfaceParams params{}; | ||
| 272 | |||
| 273 | params.is_tiled = !config.linear; | ||
| 274 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, | ||
| 275 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | ||
| 276 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | ||
| 277 | params.tile_width_spacing = 1; | ||
| 278 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 279 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 280 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 281 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 282 | params.type = GetFormatType(params.pixel_format); | ||
| 283 | params.width = config.width; | ||
| 284 | params.pitch = config.pitch; | ||
| 285 | params.height = config.height; | ||
| 286 | params.unaligned_height = config.height; | ||
| 287 | params.target = SurfaceTarget::Texture2D; | ||
| 288 | params.identity = SurfaceClass::Copy; | ||
| 289 | params.depth = 1; | ||
| 290 | params.max_mip_level = 1; | ||
| 291 | params.rt = {}; | ||
| 292 | |||
| 293 | params.InitCacheParameters(config.Address()); | ||
| 294 | |||
| 295 | return params; | ||
| 296 | } | ||
| 297 | |||
| 298 | static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | ||
| 299 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U | ||
| 300 | {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S | ||
| 301 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI | ||
| 302 | {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U | ||
| 303 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, | ||
| 304 | false}, // A2B10G10R10U | ||
| 305 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U | ||
| 306 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U | ||
| 307 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI | ||
| 308 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F | ||
| 309 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U | ||
| 310 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI | ||
| 311 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, | ||
| 312 | false}, // R11FG11FB10F | ||
| 313 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI | ||
| 314 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 315 | true}, // DXT1 | ||
| 316 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 317 | true}, // DXT23 | ||
| 318 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 319 | true}, // DXT45 | ||
| 320 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 | ||
| 321 | {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 322 | true}, // DXN2UNORM | ||
| 323 | {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM | ||
| 324 | {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 325 | true}, // BC7U | ||
| 326 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 327 | true}, // BC6H_UF16 | ||
| 328 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 329 | true}, // BC6H_SF16 | ||
| 330 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 | ||
| 331 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 332 | {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F | ||
| 333 | {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F | ||
| 334 | {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F | ||
| 335 | {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F | ||
| 336 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U | ||
| 337 | {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S | ||
| 338 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI | ||
| 339 | {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I | ||
| 340 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 | ||
| 341 | {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F | ||
| 342 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI | ||
| 343 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I | ||
| 344 | {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S | ||
| 345 | {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F | ||
| 346 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, | ||
| 347 | false}, // RGBA8_SRGB | ||
| 348 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U | ||
| 349 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S | ||
| 350 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI | ||
| 351 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI | ||
| 352 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 | ||
| 353 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 | ||
| 354 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 | ||
| 355 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 356 | // Compressed sRGB formats | ||
| 357 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 358 | true}, // DXT1_SRGB | ||
| 359 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 360 | true}, // DXT23_SRGB | ||
| 361 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 362 | true}, // DXT45_SRGB | ||
| 363 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 364 | true}, // BC7U_SRGB | ||
| 365 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB | ||
| 366 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB | ||
| 367 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB | ||
| 368 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB | ||
| 369 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 | ||
| 370 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB | ||
| 371 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 | ||
| 372 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB | ||
| 373 | |||
| 374 | // Depth formats | ||
| 375 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | ||
| 376 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, | ||
| 377 | false}, // Z16 | ||
| 378 | |||
| 379 | // DepthStencil formats | ||
| 380 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 381 | false}, // Z24S8 | ||
| 382 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 383 | false}, // S8Z24 | ||
| 384 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, | ||
| 385 | ComponentType::Float, false}, // Z32FS8 | ||
| 386 | }}; | ||
| 387 | |||
| 388 | static GLenum SurfaceTargetToGL(SurfaceTarget target) { | ||
| 389 | switch (target) { | ||
| 390 | case SurfaceTarget::Texture1D: | ||
| 391 | return GL_TEXTURE_1D; | ||
| 392 | case SurfaceTarget::Texture2D: | ||
| 393 | return GL_TEXTURE_2D; | ||
| 394 | case SurfaceTarget::Texture3D: | ||
| 395 | return GL_TEXTURE_3D; | ||
| 396 | case SurfaceTarget::Texture1DArray: | ||
| 397 | return GL_TEXTURE_1D_ARRAY; | ||
| 398 | case SurfaceTarget::Texture2DArray: | ||
| 399 | return GL_TEXTURE_2D_ARRAY; | ||
| 400 | case SurfaceTarget::TextureCubemap: | ||
| 401 | return GL_TEXTURE_CUBE_MAP; | ||
| 402 | case SurfaceTarget::TextureCubeArray: | ||
| 403 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 404 | } | ||
| 405 | LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target)); | ||
| 406 | UNREACHABLE(); | ||
| 407 | return {}; | ||
| 408 | } | ||
| 409 | |||
| 410 | static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | ||
| 411 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||
| 412 | auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; | ||
| 413 | ASSERT(component_type == format.component_type); | ||
| 414 | |||
| 415 | return format; | ||
| 416 | } | ||
| 417 | |||
| 418 | /// Returns the discrepant array target | ||
| 419 | constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { | ||
| 420 | switch (target) { | ||
| 421 | case SurfaceTarget::Texture1D: | ||
| 422 | return GL_TEXTURE_1D_ARRAY; | ||
| 423 | case SurfaceTarget::Texture2D: | ||
| 424 | return GL_TEXTURE_2D_ARRAY; | ||
| 425 | case SurfaceTarget::Texture3D: | ||
| 426 | return GL_NONE; | ||
| 427 | case SurfaceTarget::Texture1DArray: | ||
| 428 | return GL_TEXTURE_1D; | ||
| 429 | case SurfaceTarget::Texture2DArray: | ||
| 430 | return GL_TEXTURE_2D; | ||
| 431 | case SurfaceTarget::TextureCubemap: | ||
| 432 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 433 | case SurfaceTarget::TextureCubeArray: | ||
| 434 | return GL_TEXTURE_CUBE_MAP; | ||
| 435 | } | ||
| 436 | return GL_NONE; | ||
| 437 | } | ||
| 438 | |||
| 439 | Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { | ||
| 440 | u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; | ||
| 441 | if (IsPixelFormatASTC(pixel_format)) { | ||
| 442 | // ASTC formats must stop at the ATSC block size boundary | ||
| 443 | actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); | ||
| 444 | } | ||
| 445 | return {0, actual_height, MipWidth(mip_level), 0}; | ||
| 446 | } | ||
| 447 | |||
| 448 | void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, | ||
| 449 | std::vector<u8>& gl_buffer, u32 mip_level) { | ||
| 450 | u32 depth = params.MipDepth(mip_level); | ||
| 451 | if (params.target == SurfaceTarget::Texture2D) { | ||
| 452 | // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. | ||
| 453 | depth = 1U; | ||
| 454 | } | ||
| 455 | if (params.is_layered) { | ||
| 456 | u64 offset = params.GetMipmapLevelOffset(mip_level); | ||
| 457 | u64 offset_gl = 0; | ||
| 458 | const u64 layer_size = params.LayerMemorySize(); | ||
| 459 | const u64 gl_size = params.LayerSizeGL(mip_level); | ||
| 460 | for (u32 i = 0; i < params.depth; i++) { | ||
| 461 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | ||
| 462 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | ||
| 463 | params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, | ||
| 464 | gl_buffer.data() + offset_gl, params.host_ptr + offset); | ||
| 465 | offset += layer_size; | ||
| 466 | offset_gl += gl_size; | ||
| 467 | } | ||
| 468 | } else { | ||
| 469 | const u64 offset = params.GetMipmapLevelOffset(mip_level); | ||
| 470 | MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), | ||
| 471 | params.MipBlockHeight(mip_level), params.MipHeight(mip_level), | ||
| 472 | params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, | ||
| 473 | gl_buffer.data(), params.host_ptr + offset); | ||
| 474 | } | ||
| 475 | } | ||
| 476 | |||
| 477 | void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface, | ||
| 478 | const Surface& dst_surface) { | ||
| 479 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 480 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 481 | |||
| 482 | const u32 width{std::min(src_params.width, dst_params.width)}; | ||
| 483 | const u32 height{std::min(src_params.height, dst_params.height)}; | ||
| 484 | |||
| 485 | glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, | ||
| 486 | 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, | ||
| 487 | 0, 0, width, height, 1); | ||
| 488 | |||
| 489 | dst_surface->MarkAsModified(true, *this); | ||
| 490 | } | ||
| 491 | |||
| 492 | MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); | ||
| 493 | void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 494 | const GLuint copy_pbo_handle, const GLenum src_attachment, | ||
| 495 | const GLenum dst_attachment, | ||
| 496 | const std::size_t cubemap_face) { | ||
| 497 | MICROPROFILE_SCOPE(OpenGL_CopySurface); | ||
| 498 | ASSERT_MSG(dst_attachment == 0, "Unimplemented"); | ||
| 499 | |||
| 500 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 501 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 502 | |||
| 503 | const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); | ||
| 504 | const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); | ||
| 505 | |||
| 506 | const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); | ||
| 507 | |||
| 508 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | ||
| 509 | glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY); | ||
| 510 | if (source_format.compressed) { | ||
| 511 | glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, | ||
| 512 | static_cast<GLsizei>(src_params.size_in_bytes), nullptr); | ||
| 513 | } else { | ||
| 514 | glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format, | ||
| 515 | source_format.type, static_cast<GLsizei>(src_params.size_in_bytes), | ||
| 516 | nullptr); | ||
| 517 | } | ||
| 518 | // If the new texture is bigger than the previous one, we need to fill in the rest with data | ||
| 519 | // from the CPU. | ||
| 520 | if (src_params.size_in_bytes < dst_params.size_in_bytes) { | ||
| 521 | // Upload the rest of the memory. | ||
| 522 | if (dst_params.is_tiled) { | ||
| 523 | // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest | ||
| 524 | // of the data in this case. Games like Super Mario Odyssey seem to hit this case | ||
| 525 | // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer | ||
| 526 | // but it doesn't clear it beforehand, the texture is already full of zeros. | ||
| 527 | LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " | ||
| 528 | "reinterpretation but the texture is tiled."); | ||
| 529 | } | ||
| 530 | const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; | ||
| 531 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 532 | glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, | ||
| 533 | memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); | ||
| 534 | } | ||
| 535 | |||
| 536 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 537 | |||
| 538 | const GLsizei width{static_cast<GLsizei>( | ||
| 539 | std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))}; | ||
| 540 | const GLsizei height{static_cast<GLsizei>( | ||
| 541 | std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))}; | ||
| 542 | |||
| 543 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | ||
| 544 | if (dest_format.compressed) { | ||
| 545 | LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); | ||
| 546 | UNREACHABLE(); | ||
| 547 | } else { | ||
| 548 | switch (dst_params.target) { | ||
| 549 | case SurfaceTarget::Texture1D: | ||
| 550 | glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, | ||
| 551 | dest_format.type, nullptr); | ||
| 552 | break; | ||
| 553 | case SurfaceTarget::Texture2D: | ||
| 554 | glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, | ||
| 555 | dest_format.format, dest_format.type, nullptr); | ||
| 556 | break; | ||
| 557 | case SurfaceTarget::Texture3D: | ||
| 558 | case SurfaceTarget::Texture2DArray: | ||
| 559 | case SurfaceTarget::TextureCubeArray: | ||
| 560 | glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, | ||
| 561 | static_cast<GLsizei>(dst_params.depth), dest_format.format, | ||
| 562 | dest_format.type, nullptr); | ||
| 563 | break; | ||
| 564 | case SurfaceTarget::TextureCubemap: | ||
| 565 | glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, | ||
| 566 | static_cast<GLint>(cubemap_face), width, height, 1, | ||
| 567 | dest_format.format, dest_format.type, nullptr); | ||
| 568 | break; | ||
| 569 | default: | ||
| 570 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 571 | static_cast<u32>(dst_params.target)); | ||
| 572 | UNREACHABLE(); | ||
| 573 | } | ||
| 574 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 575 | } | ||
| 576 | |||
| 577 | dst_surface->MarkAsModified(true, *this); | ||
| 578 | } | ||
| 579 | |||
| 580 | CachedSurface::CachedSurface(const SurfaceParams& params) | ||
| 581 | : RasterizerCacheObject{params.host_ptr}, params{params}, | ||
| 582 | gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} { | ||
| 583 | |||
| 584 | const auto optional_cpu_addr{ | ||
| 585 | Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)}; | ||
| 586 | ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid"); | ||
| 587 | cpu_addr = *optional_cpu_addr; | ||
| 588 | |||
| 589 | texture.Create(gl_target); | ||
| 590 | |||
| 591 | // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) | ||
| 592 | // alternatives. This signals a bug on those functions. | ||
| 593 | const auto width = static_cast<GLsizei>(params.MipWidth(0)); | ||
| 594 | const auto height = static_cast<GLsizei>(params.MipHeight(0)); | ||
| 595 | memory_size = params.MemorySize(); | ||
| 596 | reinterpreted = false; | ||
| 597 | |||
| 598 | const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 599 | gl_internal_format = format_tuple.internal_format; | ||
| 600 | |||
| 601 | switch (params.target) { | ||
| 602 | case SurfaceTarget::Texture1D: | ||
| 603 | glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 604 | width); | ||
| 605 | break; | ||
| 606 | case SurfaceTarget::Texture2D: | ||
| 607 | case SurfaceTarget::TextureCubemap: | ||
| 608 | glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 609 | width, height); | ||
| 610 | break; | ||
| 611 | case SurfaceTarget::Texture3D: | ||
| 612 | case SurfaceTarget::Texture2DArray: | ||
| 613 | case SurfaceTarget::TextureCubeArray: | ||
| 614 | glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 615 | width, height, params.depth); | ||
| 616 | break; | ||
| 617 | default: | ||
| 618 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 619 | static_cast<u32>(params.target)); | ||
| 620 | UNREACHABLE(); | ||
| 621 | glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format, | ||
| 622 | width, height); | ||
| 623 | } | ||
| 624 | |||
| 625 | ApplyTextureDefaults(texture.handle, params.max_mip_level); | ||
| 626 | |||
| 627 | OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); | ||
| 628 | } | ||
| 629 | |||
| 630 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | ||
| 631 | void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { | ||
| 632 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | ||
| 633 | auto& gl_buffer = res_cache_tmp_mem.gl_buffer; | ||
| 634 | if (gl_buffer.size() < params.max_mip_level) | ||
| 635 | gl_buffer.resize(params.max_mip_level); | ||
| 636 | for (u32 i = 0; i < params.max_mip_level; i++) | ||
| 637 | gl_buffer[i].resize(params.GetMipmapSizeGL(i)); | ||
| 638 | if (params.is_tiled) { | ||
| 639 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||
| 640 | params.block_width, static_cast<u32>(params.target)); | ||
| 641 | for (u32 i = 0; i < params.max_mip_level; i++) | ||
| 642 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); | ||
| 643 | } else { | ||
| 644 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 645 | const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) / | ||
| 646 | GetDefaultBlockWidth(params.pixel_format); | ||
| 647 | if (params.pitch == copy_size) { | ||
| 648 | std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); | ||
| 649 | } else { | ||
| 650 | const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) / | ||
| 651 | GetDefaultBlockHeight(params.pixel_format); | ||
| 652 | const u8* start{params.host_ptr}; | ||
| 653 | u8* write_to = gl_buffer[0].data(); | ||
| 654 | for (u32 h = height; h > 0; h--) { | ||
| 655 | std::memcpy(write_to, start, copy_size); | ||
| 656 | start += params.pitch; | ||
| 657 | write_to += copy_size; | ||
| 658 | } | ||
| 659 | } | ||
| 660 | } | ||
| 661 | for (u32 i = 0; i < params.max_mip_level; i++) { | ||
| 662 | const u32 width = params.MipWidth(i); | ||
| 663 | const u32 height = params.MipHeight(i); | ||
| 664 | const u32 depth = params.MipDepth(i); | ||
| 665 | if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { | ||
| 666 | // Reserve size for RGBA8 conversion | ||
| 667 | constexpr std::size_t rgba_bpp = 4; | ||
| 668 | gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); | ||
| 669 | } | ||
| 670 | Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, | ||
| 671 | height, depth, true, true); | ||
| 672 | } | ||
| 673 | } | ||
| 674 | |||
| 675 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | ||
| 676 | void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { | ||
| 677 | MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); | ||
| 678 | |||
| 679 | ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); | ||
| 680 | |||
| 681 | auto& gl_buffer = res_cache_tmp_mem.gl_buffer; | ||
| 682 | // OpenGL temporary buffer needs to be big enough to store raw texture size | ||
| 683 | gl_buffer[0].resize(GetSizeInBytes()); | ||
| 684 | |||
| 685 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 686 | const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); | ||
| 687 | glPixelStorei(GL_PACK_ALIGNMENT, align); | ||
| 688 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); | ||
| 689 | ASSERT(!tuple.compressed); | ||
| 690 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 691 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, | ||
| 692 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); | ||
| 693 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||
| 694 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, | ||
| 695 | params.height, params.depth, true, true); | ||
| 696 | if (params.is_tiled) { | ||
| 697 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||
| 698 | params.block_width, static_cast<u32>(params.target)); | ||
| 699 | |||
| 700 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); | ||
| 701 | } else { | ||
| 702 | const u32 bpp = params.GetFormatBpp() / 8; | ||
| 703 | const u32 copy_size = params.width * bpp; | ||
| 704 | if (params.pitch == copy_size) { | ||
| 705 | std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); | ||
| 706 | } else { | ||
| 707 | u8* start{params.host_ptr}; | ||
| 708 | const u8* read_to = gl_buffer[0].data(); | ||
| 709 | for (u32 h = params.height; h > 0; h--) { | ||
| 710 | std::memcpy(start, read_to, copy_size); | ||
| 711 | start += params.pitch; | ||
| 712 | read_to += copy_size; | ||
| 713 | } | ||
| 714 | } | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, | ||
| 719 | GLuint read_fb_handle, GLuint draw_fb_handle) { | ||
| 720 | const auto& rect{params.GetRect(mip_map)}; | ||
| 721 | |||
| 722 | auto& gl_buffer = res_cache_tmp_mem.gl_buffer; | ||
| 723 | |||
| 724 | // Load data from memory to the surface | ||
| 725 | const auto x0 = static_cast<GLint>(rect.left); | ||
| 726 | const auto y0 = static_cast<GLint>(rect.bottom); | ||
| 727 | auto buffer_offset = | ||
| 728 | static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + | ||
| 729 | static_cast<std::size_t>(x0)) * | ||
| 730 | GetBytesPerPixel(params.pixel_format); | ||
| 731 | |||
| 732 | const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 733 | |||
| 734 | const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); | ||
| 735 | glPixelStorei(GL_UNPACK_ALIGNMENT, align); | ||
| 736 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); | ||
| 737 | |||
| 738 | const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); | ||
| 739 | if (tuple.compressed) { | ||
| 740 | switch (params.target) { | ||
| 741 | case SurfaceTarget::Texture2D: | ||
| 742 | glCompressedTextureSubImage2D( | ||
| 743 | texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 744 | static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size, | ||
| 745 | &gl_buffer[mip_map][buffer_offset]); | ||
| 746 | break; | ||
| 747 | case SurfaceTarget::Texture3D: | ||
| 748 | glCompressedTextureSubImage3D( | ||
| 749 | texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 750 | static_cast<GLsizei>(params.MipHeight(mip_map)), | ||
| 751 | static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size, | ||
| 752 | &gl_buffer[mip_map][buffer_offset]); | ||
| 753 | break; | ||
| 754 | case SurfaceTarget::Texture2DArray: | ||
| 755 | case SurfaceTarget::TextureCubeArray: | ||
| 756 | glCompressedTextureSubImage3D( | ||
| 757 | texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 758 | static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth), | ||
| 759 | tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]); | ||
| 760 | break; | ||
| 761 | case SurfaceTarget::TextureCubemap: { | ||
| 762 | const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); | ||
| 763 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 764 | glCompressedTextureSubImage3D( | ||
| 765 | texture.handle, mip_map, 0, 0, static_cast<GLint>(face), | ||
| 766 | static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 767 | static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format, | ||
| 768 | layer_size, &gl_buffer[mip_map][buffer_offset]); | ||
| 769 | buffer_offset += layer_size; | ||
| 770 | } | ||
| 771 | break; | ||
| 772 | } | ||
| 773 | default: | ||
| 774 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 775 | static_cast<u32>(params.target)); | ||
| 776 | UNREACHABLE(); | ||
| 777 | glCompressedTextureSubImage2D( | ||
| 778 | texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), | ||
| 779 | static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, | ||
| 780 | static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]); | ||
| 781 | } | ||
| 782 | } else { | ||
| 783 | switch (params.target) { | ||
| 784 | case SurfaceTarget::Texture1D: | ||
| 785 | glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()), | ||
| 786 | tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 787 | break; | ||
| 788 | case SurfaceTarget::Texture2D: | ||
| 789 | glTextureSubImage2D(texture.handle, mip_map, x0, y0, | ||
| 790 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 791 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | ||
| 792 | &gl_buffer[mip_map][buffer_offset]); | ||
| 793 | break; | ||
| 794 | case SurfaceTarget::Texture3D: | ||
| 795 | glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, | ||
| 796 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 797 | static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), | ||
| 798 | tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 799 | break; | ||
| 800 | case SurfaceTarget::Texture2DArray: | ||
| 801 | case SurfaceTarget::TextureCubeArray: | ||
| 802 | glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, | ||
| 803 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 804 | static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, | ||
| 805 | tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 806 | break; | ||
| 807 | case SurfaceTarget::TextureCubemap: { | ||
| 808 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 809 | glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face), | ||
| 810 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 811 | static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format, | ||
| 812 | tuple.type, &gl_buffer[mip_map][buffer_offset]); | ||
| 813 | buffer_offset += params.LayerSizeGL(mip_map); | ||
| 814 | } | ||
| 815 | break; | ||
| 816 | } | ||
| 817 | default: | ||
| 818 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 819 | static_cast<u32>(params.target)); | ||
| 820 | UNREACHABLE(); | ||
| 821 | glTextureSubImage2D(texture.handle, mip_map, x0, y0, | ||
| 822 | static_cast<GLsizei>(rect.GetWidth()), | ||
| 823 | static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, | ||
| 824 | &gl_buffer[mip_map][buffer_offset]); | ||
| 825 | } | ||
| 826 | } | ||
| 827 | |||
| 828 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | ||
| 829 | } | ||
| 830 | |||
| 831 | void CachedSurface::EnsureTextureDiscrepantView() { | ||
| 832 | if (discrepant_view.handle != 0) | ||
| 833 | return; | ||
| 834 | |||
| 835 | const GLenum target{GetArrayDiscrepantTarget(params.target)}; | ||
| 836 | ASSERT(target != GL_NONE); | ||
| 837 | |||
| 838 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; | ||
| 839 | constexpr GLuint min_layer = 0; | ||
| 840 | constexpr GLuint min_level = 0; | ||
| 841 | |||
| 842 | glGenTextures(1, &discrepant_view.handle); | ||
| 843 | glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, | ||
| 844 | params.max_mip_level, min_layer, num_layers); | ||
| 845 | ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); | ||
| 846 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, | ||
| 847 | reinterpret_cast<const GLint*>(swizzle.data())); | ||
| 848 | } | ||
| 849 | |||
| 850 | MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); | ||
| 851 | void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, | ||
| 852 | GLuint read_fb_handle, GLuint draw_fb_handle) { | ||
| 853 | MICROPROFILE_SCOPE(OpenGL_TextureUL); | ||
| 854 | |||
| 855 | for (u32 i = 0; i < params.max_mip_level; i++) | ||
| 856 | UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); | ||
| 857 | } | ||
| 858 | |||
| 859 | void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, | ||
| 860 | Tegra::Texture::SwizzleSource swizzle_y, | ||
| 861 | Tegra::Texture::SwizzleSource swizzle_z, | ||
| 862 | Tegra::Texture::SwizzleSource swizzle_w) { | ||
| 863 | const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x); | ||
| 864 | const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y); | ||
| 865 | const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z); | ||
| 866 | const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w); | ||
| 867 | if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) { | ||
| 868 | return; | ||
| 869 | } | ||
| 870 | swizzle = {new_x, new_y, new_z, new_w}; | ||
| 871 | const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); | ||
| 872 | glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | ||
| 873 | if (discrepant_view.handle != 0) { | ||
| 874 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | ||
| 875 | } | ||
| 876 | } | ||
| 877 | |||
| 878 | RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 879 | : RasterizerCache{rasterizer} { | ||
| 880 | read_framebuffer.Create(); | ||
| 881 | draw_framebuffer.Create(); | ||
| 882 | copy_pbo.Create(); | ||
| 883 | } | ||
| 884 | |||
| 885 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, | ||
| 886 | const GLShader::SamplerEntry& entry) { | ||
| 887 | return GetSurface(SurfaceParams::CreateForTexture(config, entry)); | ||
| 888 | } | ||
| 889 | |||
| 890 | Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { | ||
| 891 | auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; | ||
| 892 | const auto& regs{gpu.regs}; | ||
| 893 | |||
| 894 | if (!gpu.dirty_flags.zeta_buffer) { | ||
| 895 | return last_depth_buffer; | ||
| 896 | } | ||
| 897 | gpu.dirty_flags.zeta_buffer = false; | ||
| 898 | |||
| 899 | if (!regs.zeta.Address() || !regs.zeta_enable) { | ||
| 900 | return last_depth_buffer = {}; | ||
| 901 | } | ||
| 902 | |||
| 903 | SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( | ||
| 904 | regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format, | ||
| 905 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 906 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 907 | |||
| 908 | return last_depth_buffer = GetSurface(depth_params, preserve_contents); | ||
| 909 | } | ||
| 910 | |||
| 911 | Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 912 | auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; | ||
| 913 | const auto& regs{gpu.regs}; | ||
| 914 | |||
| 915 | if (!gpu.dirty_flags.color_buffer[index]) { | ||
| 916 | return current_color_buffers[index]; | ||
| 917 | } | ||
| 918 | gpu.dirty_flags.color_buffer.reset(index); | ||
| 919 | |||
| 920 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 921 | |||
| 922 | if (index >= regs.rt_control.count) { | ||
| 923 | return current_color_buffers[index] = {}; | ||
| 924 | } | ||
| 925 | |||
| 926 | if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 927 | return current_color_buffers[index] = {}; | ||
| 928 | } | ||
| 929 | |||
| 930 | const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; | ||
| 931 | |||
| 932 | return current_color_buffers[index] = GetSurface(color_params, preserve_contents); | ||
| 933 | } | ||
| 934 | |||
| 935 | void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | ||
| 936 | surface->LoadGLBuffer(temporal_memory); | ||
| 937 | surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); | ||
| 938 | surface->MarkAsModified(false, *this); | ||
| 939 | surface->MarkForReload(false); | ||
| 940 | } | ||
| 941 | |||
| 942 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { | ||
| 943 | if (!params.IsValid()) { | ||
| 944 | return {}; | ||
| 945 | } | ||
| 946 | |||
| 947 | // Look up surface in the cache based on address | ||
| 948 | Surface surface{TryGet(params.host_ptr)}; | ||
| 949 | if (surface) { | ||
| 950 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | ||
| 951 | // Use the cached surface as-is unless it's not synced with memory | ||
| 952 | if (surface->MustReload()) | ||
| 953 | LoadSurface(surface); | ||
| 954 | return surface; | ||
| 955 | } else if (preserve_contents) { | ||
| 956 | // If surface parameters changed and we care about keeping the previous data, recreate | ||
| 957 | // the surface from the old one | ||
| 958 | Surface new_surface{RecreateSurface(surface, params)}; | ||
| 959 | Unregister(surface); | ||
| 960 | Register(new_surface); | ||
| 961 | if (new_surface->IsUploaded()) { | ||
| 962 | RegisterReinterpretSurface(new_surface); | ||
| 963 | } | ||
| 964 | return new_surface; | ||
| 965 | } else { | ||
| 966 | // Delete the old surface before creating a new one to prevent collisions. | ||
| 967 | Unregister(surface); | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | // No cached surface found - get a new one | ||
| 972 | surface = GetUncachedSurface(params); | ||
| 973 | Register(surface); | ||
| 974 | |||
| 975 | // Only load surface from memory if we care about the contents | ||
| 976 | if (preserve_contents) { | ||
| 977 | LoadSurface(surface); | ||
| 978 | } | ||
| 979 | |||
| 980 | return surface; | ||
| 981 | } | ||
| 982 | |||
| 983 | Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { | ||
| 984 | Surface surface{TryGetReservedSurface(params)}; | ||
| 985 | if (!surface) { | ||
| 986 | // No reserved surface available, create a new one and reserve it | ||
| 987 | surface = std::make_shared<CachedSurface>(params); | ||
| 988 | ReserveSurface(surface); | ||
| 989 | } | ||
| 990 | return surface; | ||
| 991 | } | ||
| 992 | |||
| 993 | void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | ||
| 994 | const Surface& dst_surface) { | ||
| 995 | const auto& init_params{src_surface->GetSurfaceParams()}; | ||
| 996 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 997 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 998 | GPUVAddr address{init_params.gpu_addr}; | ||
| 999 | const std::size_t layer_size{dst_params.LayerMemorySize()}; | ||
| 1000 | for (u32 layer = 0; layer < dst_params.depth; layer++) { | ||
| 1001 | for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { | ||
| 1002 | const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; | ||
| 1003 | const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; | ||
| 1004 | if (!copy) { | ||
| 1005 | continue; | ||
| 1006 | } | ||
| 1007 | const auto& src_params{copy->GetSurfaceParams()}; | ||
| 1008 | const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; | ||
| 1009 | const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; | ||
| 1010 | |||
| 1011 | glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, | ||
| 1012 | 0, 0, dst_surface->Texture().handle, | ||
| 1013 | SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width, | ||
| 1014 | height, 1); | ||
| 1015 | } | ||
| 1016 | address += layer_size; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | dst_surface->MarkAsModified(true, *this); | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 1023 | const Common::Rectangle<u32>& src_rect, | ||
| 1024 | const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, | ||
| 1025 | GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, | ||
| 1026 | std::size_t cubemap_face = 0) { | ||
| 1027 | |||
| 1028 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 1029 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 1030 | |||
| 1031 | OpenGLState prev_state{OpenGLState::GetCurState()}; | ||
| 1032 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 1033 | |||
| 1034 | OpenGLState state; | ||
| 1035 | state.draw.read_framebuffer = read_fb_handle; | ||
| 1036 | state.draw.draw_framebuffer = draw_fb_handle; | ||
| 1037 | state.Apply(); | ||
| 1038 | |||
| 1039 | u32 buffers{}; | ||
| 1040 | |||
| 1041 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 1042 | switch (src_params.target) { | ||
| 1043 | case SurfaceTarget::Texture2D: | ||
| 1044 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1045 | GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||
| 1046 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1047 | 0, 0); | ||
| 1048 | break; | ||
| 1049 | case SurfaceTarget::TextureCubemap: | ||
| 1050 | glFramebufferTexture2D( | ||
| 1051 | GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1052 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||
| 1053 | src_surface->Texture().handle, 0); | ||
| 1054 | glFramebufferTexture2D( | ||
| 1055 | GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1056 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||
| 1057 | break; | ||
| 1058 | case SurfaceTarget::Texture2DArray: | ||
| 1059 | glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1060 | src_surface->Texture().handle, 0, 0); | ||
| 1061 | glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||
| 1062 | break; | ||
| 1063 | case SurfaceTarget::Texture3D: | ||
| 1064 | glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1065 | SurfaceTargetToGL(src_params.target), | ||
| 1066 | src_surface->Texture().handle, 0, 0); | ||
| 1067 | glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1068 | SurfaceTargetToGL(src_params.target), 0, 0, 0); | ||
| 1069 | break; | ||
| 1070 | default: | ||
| 1071 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1072 | GL_TEXTURE_2D, src_surface->Texture().handle, 0); | ||
| 1073 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1074 | 0, 0); | ||
| 1075 | break; | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | switch (dst_params.target) { | ||
| 1079 | case SurfaceTarget::Texture2D: | ||
| 1080 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1081 | GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||
| 1082 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1083 | 0, 0); | ||
| 1084 | break; | ||
| 1085 | case SurfaceTarget::TextureCubemap: | ||
| 1086 | glFramebufferTexture2D( | ||
| 1087 | GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1088 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), | ||
| 1089 | dst_surface->Texture().handle, 0); | ||
| 1090 | glFramebufferTexture2D( | ||
| 1091 | GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1092 | static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); | ||
| 1093 | break; | ||
| 1094 | case SurfaceTarget::Texture2DArray: | ||
| 1095 | glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1096 | dst_surface->Texture().handle, 0, 0); | ||
| 1097 | glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); | ||
| 1098 | break; | ||
| 1099 | |||
| 1100 | case SurfaceTarget::Texture3D: | ||
| 1101 | glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1102 | SurfaceTargetToGL(dst_params.target), | ||
| 1103 | dst_surface->Texture().handle, 0, 0); | ||
| 1104 | glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, | ||
| 1105 | SurfaceTargetToGL(dst_params.target), 0, 0, 0); | ||
| 1106 | break; | ||
| 1107 | default: | ||
| 1108 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1109 | GL_TEXTURE_2D, dst_surface->Texture().handle, 0); | ||
| 1110 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1111 | 0, 0); | ||
| 1112 | break; | ||
| 1113 | } | ||
| 1114 | |||
| 1115 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 1116 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 1117 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1118 | GL_TEXTURE_2D, 0, 0); | ||
| 1119 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1120 | src_surface->Texture().handle, 0); | ||
| 1121 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 1122 | |||
| 1123 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1124 | GL_TEXTURE_2D, 0, 0); | ||
| 1125 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1126 | dst_surface->Texture().handle, 0); | ||
| 1127 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 1128 | |||
| 1129 | buffers = GL_DEPTH_BUFFER_BIT; | ||
| 1130 | } else if (src_params.type == SurfaceType::DepthStencil) { | ||
| 1131 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, | ||
| 1132 | GL_TEXTURE_2D, 0, 0); | ||
| 1133 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1134 | src_surface->Texture().handle, 0); | ||
| 1135 | |||
| 1136 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, | ||
| 1137 | GL_TEXTURE_2D, 0, 0); | ||
| 1138 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, | ||
| 1139 | dst_surface->Texture().handle, 0); | ||
| 1140 | |||
| 1141 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 1142 | } | ||
| 1143 | |||
| 1144 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, | ||
| 1145 | dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, | ||
| 1146 | buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); | ||
| 1147 | |||
| 1148 | return true; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void RasterizerCacheOpenGL::FermiCopySurface( | ||
| 1152 | const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 1153 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 1154 | const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { | ||
| 1155 | |||
| 1156 | const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); | ||
| 1157 | const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); | ||
| 1158 | |||
| 1159 | ASSERT(src_params.pixel_format == dst_params.pixel_format); | ||
| 1160 | ASSERT(src_params.block_height == dst_params.block_height); | ||
| 1161 | ASSERT(src_params.is_tiled == dst_params.is_tiled); | ||
| 1162 | ASSERT(src_params.depth == dst_params.depth); | ||
| 1163 | ASSERT(src_params.target == dst_params.target); | ||
| 1164 | ASSERT(src_params.rt.index == dst_params.rt.index); | ||
| 1165 | |||
| 1166 | auto src_surface = GetSurface(src_params, true); | ||
| 1167 | auto dst_surface = GetSurface(dst_params, true); | ||
| 1168 | |||
| 1169 | BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, | ||
| 1170 | draw_framebuffer.handle); | ||
| 1171 | |||
| 1172 | dst_surface->MarkAsModified(true, *this); | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, | ||
| 1176 | const Surface& dst_surface) { | ||
| 1177 | const auto& src_params{src_surface->GetSurfaceParams()}; | ||
| 1178 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||
| 1179 | |||
| 1180 | // Flush enough memory for both the source and destination surface | ||
| 1181 | FlushRegion(ToCacheAddr(src_params.host_ptr), | ||
| 1182 | std::max(src_params.MemorySize(), dst_params.MemorySize())); | ||
| 1183 | |||
| 1184 | LoadSurface(dst_surface); | ||
| 1185 | } | ||
| 1186 | |||
| 1187 | Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, | ||
| 1188 | const SurfaceParams& new_params) { | ||
| 1189 | // Verify surface is compatible for blitting | ||
| 1190 | auto old_params{old_surface->GetSurfaceParams()}; | ||
| 1191 | |||
| 1192 | // Get a new surface with the new parameters, and blit the previous surface to it | ||
| 1193 | Surface new_surface{GetUncachedSurface(new_params)}; | ||
| 1194 | |||
| 1195 | // With use_accurate_gpu_emulation enabled, do an accurate surface copy | ||
| 1196 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 1197 | AccurateCopySurface(old_surface, new_surface); | ||
| 1198 | return new_surface; | ||
| 1199 | } | ||
| 1200 | |||
| 1201 | const bool old_compressed = | ||
| 1202 | GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; | ||
| 1203 | const bool new_compressed = | ||
| 1204 | GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; | ||
| 1205 | const bool compatible_formats = | ||
| 1206 | GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && | ||
| 1207 | !(old_compressed || new_compressed); | ||
| 1208 | // For compatible surfaces, we can just do fast glCopyImageSubData based copy | ||
| 1209 | if (old_params.target == new_params.target && old_params.depth == new_params.depth && | ||
| 1210 | old_params.depth == 1 && compatible_formats) { | ||
| 1211 | FastCopySurface(old_surface, new_surface); | ||
| 1212 | return new_surface; | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | switch (new_params.target) { | ||
| 1216 | case SurfaceTarget::Texture2D: | ||
| 1217 | CopySurface(old_surface, new_surface, copy_pbo.handle); | ||
| 1218 | break; | ||
| 1219 | case SurfaceTarget::Texture3D: | ||
| 1220 | AccurateCopySurface(old_surface, new_surface); | ||
| 1221 | break; | ||
| 1222 | case SurfaceTarget::TextureCubemap: | ||
| 1223 | case SurfaceTarget::Texture2DArray: | ||
| 1224 | case SurfaceTarget::TextureCubeArray: | ||
| 1225 | if (compatible_formats) | ||
| 1226 | FastLayeredCopySurface(old_surface, new_surface); | ||
| 1227 | else { | ||
| 1228 | AccurateCopySurface(old_surface, new_surface); | ||
| 1229 | } | ||
| 1230 | break; | ||
| 1231 | default: | ||
| 1232 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 1233 | static_cast<u32>(new_params.target)); | ||
| 1234 | UNREACHABLE(); | ||
| 1235 | } | ||
| 1236 | |||
| 1237 | return new_surface; | ||
| 1238 | } | ||
| 1239 | |||
| 1240 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { | ||
| 1241 | return TryGet(host_ptr); | ||
| 1242 | } | ||
| 1243 | |||
| 1244 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | ||
| 1245 | const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; | ||
| 1246 | surface_reserve[surface_reserve_key] = surface; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) { | ||
| 1250 | const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; | ||
| 1251 | auto search{surface_reserve.find(surface_reserve_key)}; | ||
| 1252 | if (search != surface_reserve.end()) { | ||
| 1253 | return search->second; | ||
| 1254 | } | ||
| 1255 | return {}; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, | ||
| 1259 | u32 height) { | ||
| 1260 | for (u32 i = 0; i < params.max_mip_level; i++) { | ||
| 1261 | if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { | ||
| 1262 | return {i}; | ||
| 1263 | } | ||
| 1264 | } | ||
| 1265 | return {}; | ||
| 1266 | } | ||
| 1267 | |||
| 1268 | static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) { | ||
| 1269 | const std::size_t size{params.LayerMemorySize()}; | ||
| 1270 | GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; | ||
| 1271 | for (u32 i = 0; i < params.depth; i++) { | ||
| 1272 | if (start == addr) { | ||
| 1273 | return {i}; | ||
| 1274 | } | ||
| 1275 | start += size; | ||
| 1276 | } | ||
| 1277 | return {}; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, | ||
| 1281 | const Surface blitted_surface) { | ||
| 1282 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1283 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1284 | const std::size_t src_memory_size = src_params.size_in_bytes; | ||
| 1285 | const std::optional<u32> level = | ||
| 1286 | TryFindBestMipMap(src_memory_size, dst_params, src_params.height); | ||
| 1287 | if (level.has_value()) { | ||
| 1288 | if (src_params.width == dst_params.MipWidthGobAligned(*level) && | ||
| 1289 | src_params.height == dst_params.MipHeight(*level) && | ||
| 1290 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { | ||
| 1291 | const std::optional<u32> slot = | ||
| 1292 | TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); | ||
| 1293 | if (slot.has_value()) { | ||
| 1294 | glCopyImageSubData(render_surface->Texture().handle, | ||
| 1295 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | ||
| 1296 | blitted_surface->Texture().handle, | ||
| 1297 | SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, | ||
| 1298 | dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); | ||
| 1299 | blitted_surface->MarkAsModified(true, cache); | ||
| 1300 | return true; | ||
| 1301 | } | ||
| 1302 | } | ||
| 1303 | } | ||
| 1304 | return false; | ||
| 1305 | } | ||
| 1306 | |||
| 1307 | static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { | ||
| 1308 | const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); | ||
| 1309 | const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); | ||
| 1310 | if (bound2 > bound1) | ||
| 1311 | return true; | ||
| 1312 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1313 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1314 | return (dst_params.component_type != src_params.component_type); | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | static bool IsReinterpretInvalidSecond(const Surface render_surface, | ||
| 1318 | const Surface blitted_surface) { | ||
| 1319 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||
| 1320 | const auto& src_params = render_surface->GetSurfaceParams(); | ||
| 1321 | return (dst_params.height > src_params.height && dst_params.width > src_params.width); | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, | ||
| 1325 | Surface intersect) { | ||
| 1326 | if (IsReinterpretInvalid(triggering_surface, intersect)) { | ||
| 1327 | Unregister(intersect); | ||
| 1328 | return false; | ||
| 1329 | } | ||
| 1330 | if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { | ||
| 1331 | if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { | ||
| 1332 | Unregister(intersect); | ||
| 1333 | return false; | ||
| 1334 | } | ||
| 1335 | FlushObject(intersect); | ||
| 1336 | FlushObject(triggering_surface); | ||
| 1337 | intersect->MarkForReload(true); | ||
| 1338 | } | ||
| 1339 | return true; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | void RasterizerCacheOpenGL::SignalPreDrawCall() { | ||
| 1343 | if (texception && GLAD_GL_ARB_texture_barrier) { | ||
| 1344 | glTextureBarrier(); | ||
| 1345 | } | ||
| 1346 | texception = false; | ||
| 1347 | } | ||
| 1348 | |||
| 1349 | void RasterizerCacheOpenGL::SignalPostDrawCall() { | ||
| 1350 | for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { | ||
| 1351 | if (current_color_buffers[i] != nullptr) { | ||
| 1352 | Surface intersect = | ||
| 1353 | CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); | ||
| 1354 | if (intersect != nullptr) { | ||
| 1355 | PartialReinterpretSurface(current_color_buffers[i], intersect); | ||
| 1356 | texception = true; | ||
| 1357 | } | ||
| 1358 | } | ||
| 1359 | } | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h deleted file mode 100644 index 6263ef3e7..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ /dev/null | |||
| @@ -1,572 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <string> | ||
| 10 | #include <tuple> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | ||
| 14 | #include "common/bit_util.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "common/hash.h" | ||
| 17 | #include "common/math_util.h" | ||
| 18 | #include "video_core/engines/fermi_2d.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | ||
| 20 | #include "video_core/rasterizer_cache.h" | ||
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||
| 23 | #include "video_core/surface.h" | ||
| 24 | #include "video_core/textures/decoders.h" | ||
| 25 | #include "video_core/textures/texture.h" | ||
| 26 | |||
| 27 | namespace OpenGL { | ||
| 28 | |||
| 29 | class CachedSurface; | ||
| 30 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 31 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; | ||
| 32 | |||
| 33 | using SurfaceTarget = VideoCore::Surface::SurfaceTarget; | ||
| 34 | using SurfaceType = VideoCore::Surface::SurfaceType; | ||
| 35 | using PixelFormat = VideoCore::Surface::PixelFormat; | ||
| 36 | using ComponentType = VideoCore::Surface::ComponentType; | ||
| 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 38 | |||
| 39 | struct SurfaceParams { | ||
| 40 | enum class SurfaceClass { | ||
| 41 | Uploaded, | ||
| 42 | RenderTarget, | ||
| 43 | DepthBuffer, | ||
| 44 | Copy, | ||
| 45 | }; | ||
| 46 | |||
| 47 | static std::string SurfaceTargetName(SurfaceTarget target) { | ||
| 48 | switch (target) { | ||
| 49 | case SurfaceTarget::Texture1D: | ||
| 50 | return "Texture1D"; | ||
| 51 | case SurfaceTarget::Texture2D: | ||
| 52 | return "Texture2D"; | ||
| 53 | case SurfaceTarget::Texture3D: | ||
| 54 | return "Texture3D"; | ||
| 55 | case SurfaceTarget::Texture1DArray: | ||
| 56 | return "Texture1DArray"; | ||
| 57 | case SurfaceTarget::Texture2DArray: | ||
| 58 | return "Texture2DArray"; | ||
| 59 | case SurfaceTarget::TextureCubemap: | ||
| 60 | return "TextureCubemap"; | ||
| 61 | case SurfaceTarget::TextureCubeArray: | ||
| 62 | return "TextureCubeArray"; | ||
| 63 | default: | ||
| 64 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 65 | UNREACHABLE(); | ||
| 66 | return fmt::format("TextureUnknown({})", static_cast<u32>(target)); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | u32 GetFormatBpp() const { | ||
| 71 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 72 | } | ||
| 73 | |||
| 74 | /// Returns the rectangle corresponding to this surface | ||
| 75 | Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; | ||
| 76 | |||
| 77 | /// Returns the total size of this surface in bytes, adjusted for compression | ||
| 78 | std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { | ||
| 79 | const u32 compression_factor{GetCompressionFactor(pixel_format)}; | ||
| 80 | const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; | ||
| 81 | const size_t uncompressed_size{ | ||
| 82 | Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width, | ||
| 83 | height, depth, block_height, block_depth)}; | ||
| 84 | |||
| 85 | // Divide by compression_factor^2, as height and width are factored by this | ||
| 86 | return uncompressed_size / (compression_factor * compression_factor); | ||
| 87 | } | ||
| 88 | |||
| 89 | /// Returns the size of this surface as an OpenGL texture in bytes | ||
| 90 | std::size_t SizeInBytesGL() const { | ||
| 91 | return SizeInBytesRaw(true); | ||
| 92 | } | ||
| 93 | |||
| 94 | /// Returns the size of this surface as a cube face in bytes | ||
| 95 | std::size_t SizeInBytesCubeFace() const { | ||
| 96 | return size_in_bytes / 6; | ||
| 97 | } | ||
| 98 | |||
| 99 | /// Returns the size of this surface as an OpenGL cube face in bytes | ||
| 100 | std::size_t SizeInBytesCubeFaceGL() const { | ||
| 101 | return size_in_bytes_gl / 6; | ||
| 102 | } | ||
| 103 | |||
| 104 | /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. | ||
| 105 | std::size_t MemorySize() const { | ||
| 106 | std::size_t size = InnerMemorySize(false, is_layered); | ||
| 107 | if (is_layered) | ||
| 108 | return size * depth; | ||
| 109 | return size; | ||
| 110 | } | ||
| 111 | |||
| 112 | /// Returns true if the parameters constitute a valid rasterizer surface. | ||
| 113 | bool IsValid() const { | ||
| 114 | return gpu_addr && host_ptr && height && width; | ||
| 115 | } | ||
| 116 | |||
| 117 | /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including | ||
| 118 | /// mipmaps. | ||
| 119 | std::size_t LayerMemorySize() const { | ||
| 120 | return InnerMemorySize(false, true); | ||
| 121 | } | ||
| 122 | |||
| 123 | /// Returns the size of a layer of this surface in OpenGL. | ||
| 124 | std::size_t LayerSizeGL(u32 mip_level) const { | ||
| 125 | return InnerMipmapMemorySize(mip_level, true, is_layered, false); | ||
| 126 | } | ||
| 127 | |||
| 128 | std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const { | ||
| 129 | std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed); | ||
| 130 | if (is_layered) | ||
| 131 | return size * depth; | ||
| 132 | return size; | ||
| 133 | } | ||
| 134 | |||
| 135 | std::size_t GetMipmapLevelOffset(u32 mip_level) const { | ||
| 136 | std::size_t offset = 0; | ||
| 137 | for (u32 i = 0; i < mip_level; i++) | ||
| 138 | offset += InnerMipmapMemorySize(i, false, is_layered); | ||
| 139 | return offset; | ||
| 140 | } | ||
| 141 | |||
| 142 | std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const { | ||
| 143 | std::size_t offset = 0; | ||
| 144 | for (u32 i = 0; i < mip_level; i++) | ||
| 145 | offset += InnerMipmapMemorySize(i, true, is_layered); | ||
| 146 | return offset; | ||
| 147 | } | ||
| 148 | |||
| 149 | std::size_t GetMipmapSingleSize(u32 mip_level) const { | ||
| 150 | return InnerMipmapMemorySize(mip_level, false, is_layered); | ||
| 151 | } | ||
| 152 | |||
| 153 | u32 MipWidth(u32 mip_level) const { | ||
| 154 | return std::max(1U, width >> mip_level); | ||
| 155 | } | ||
| 156 | |||
| 157 | u32 MipWidthGobAligned(u32 mip_level) const { | ||
| 158 | return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); | ||
| 159 | } | ||
| 160 | |||
| 161 | u32 MipHeight(u32 mip_level) const { | ||
| 162 | return std::max(1U, height >> mip_level); | ||
| 163 | } | ||
| 164 | |||
| 165 | u32 MipDepth(u32 mip_level) const { | ||
| 166 | return is_layered ? depth : std::max(1U, depth >> mip_level); | ||
| 167 | } | ||
| 168 | |||
| 169 | // Auto block resizing algorithm from: | ||
| 170 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 171 | u32 MipBlockHeight(u32 mip_level) const { | ||
| 172 | if (mip_level == 0) | ||
| 173 | return block_height; | ||
| 174 | u32 alt_height = MipHeight(mip_level); | ||
| 175 | u32 h = GetDefaultBlockHeight(pixel_format); | ||
| 176 | u32 blocks_in_y = (alt_height + h - 1) / h; | ||
| 177 | u32 bh = 16; | ||
| 178 | while (bh > 1 && blocks_in_y <= bh * 4) { | ||
| 179 | bh >>= 1; | ||
| 180 | } | ||
| 181 | return bh; | ||
| 182 | } | ||
| 183 | |||
| 184 | u32 MipBlockDepth(u32 mip_level) const { | ||
| 185 | if (mip_level == 0) { | ||
| 186 | return block_depth; | ||
| 187 | } | ||
| 188 | |||
| 189 | if (is_layered) { | ||
| 190 | return 1; | ||
| 191 | } | ||
| 192 | |||
| 193 | const u32 mip_depth = MipDepth(mip_level); | ||
| 194 | u32 bd = 32; | ||
| 195 | while (bd > 1 && mip_depth * 2 <= bd) { | ||
| 196 | bd >>= 1; | ||
| 197 | } | ||
| 198 | |||
| 199 | if (bd == 32) { | ||
| 200 | const u32 bh = MipBlockHeight(mip_level); | ||
| 201 | if (bh >= 4) { | ||
| 202 | return 16; | ||
| 203 | } | ||
| 204 | } | ||
| 205 | |||
| 206 | return bd; | ||
| 207 | } | ||
| 208 | |||
| 209 | u32 RowAlign(u32 mip_level) const { | ||
| 210 | const u32 m_width = MipWidth(mip_level); | ||
| 211 | const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); | ||
| 212 | const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); | ||
| 213 | return (1U << l2); | ||
| 214 | } | ||
| 215 | |||
| 216 | /// Creates SurfaceParams from a texture configuration | ||
| 217 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, | ||
| 218 | const GLShader::SamplerEntry& entry); | ||
| 219 | |||
| 220 | /// Creates SurfaceParams from a framebuffer configuration | ||
| 221 | static SurfaceParams CreateForFramebuffer(std::size_t index); | ||
| 222 | |||
| 223 | /// Creates SurfaceParams for a depth buffer configuration | ||
| 224 | static SurfaceParams CreateForDepthBuffer( | ||
| 225 | u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, | ||
| 226 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 227 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 228 | |||
| 229 | /// Creates SurfaceParams for a Fermi2D surface copy | ||
| 230 | static SurfaceParams CreateForFermiCopySurface( | ||
| 231 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 232 | |||
| 233 | /// Checks if surfaces are compatible for caching | ||
| 234 | bool IsCompatibleSurface(const SurfaceParams& other) const { | ||
| 235 | if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) == | ||
| 236 | std::tie(other.pixel_format, other.type, other.width, other.height, other.target, | ||
| 237 | other.depth, other.is_tiled)) { | ||
| 238 | if (!is_tiled) | ||
| 239 | return true; | ||
| 240 | return std::tie(block_height, block_depth, tile_width_spacing) == | ||
| 241 | std::tie(other.block_height, other.block_depth, other.tile_width_spacing); | ||
| 242 | } | ||
| 243 | return false; | ||
| 244 | } | ||
| 245 | |||
| 246 | /// Initializes parameters for caching, should be called after everything has been initialized | ||
| 247 | void InitCacheParameters(GPUVAddr gpu_addr); | ||
| 248 | |||
| 249 | std::string TargetName() const { | ||
| 250 | switch (target) { | ||
| 251 | case SurfaceTarget::Texture1D: | ||
| 252 | return "1D"; | ||
| 253 | case SurfaceTarget::Texture2D: | ||
| 254 | return "2D"; | ||
| 255 | case SurfaceTarget::Texture3D: | ||
| 256 | return "3D"; | ||
| 257 | case SurfaceTarget::Texture1DArray: | ||
| 258 | return "1DArray"; | ||
| 259 | case SurfaceTarget::Texture2DArray: | ||
| 260 | return "2DArray"; | ||
| 261 | case SurfaceTarget::TextureCubemap: | ||
| 262 | return "Cube"; | ||
| 263 | default: | ||
| 264 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 265 | UNREACHABLE(); | ||
| 266 | return fmt::format("TUK({})", static_cast<u32>(target)); | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | std::string ClassName() const { | ||
| 271 | switch (identity) { | ||
| 272 | case SurfaceClass::Uploaded: | ||
| 273 | return "UP"; | ||
| 274 | case SurfaceClass::RenderTarget: | ||
| 275 | return "RT"; | ||
| 276 | case SurfaceClass::DepthBuffer: | ||
| 277 | return "DB"; | ||
| 278 | case SurfaceClass::Copy: | ||
| 279 | return "CP"; | ||
| 280 | default: | ||
| 281 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity)); | ||
| 282 | UNREACHABLE(); | ||
| 283 | return fmt::format("CUK({})", static_cast<u32>(identity)); | ||
| 284 | } | ||
| 285 | } | ||
| 286 | |||
| 287 | std::string IdentityString() const { | ||
| 288 | return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L'); | ||
| 289 | } | ||
| 290 | |||
| 291 | bool is_tiled; | ||
| 292 | u32 block_width; | ||
| 293 | u32 block_height; | ||
| 294 | u32 block_depth; | ||
| 295 | u32 tile_width_spacing; | ||
| 296 | PixelFormat pixel_format; | ||
| 297 | ComponentType component_type; | ||
| 298 | SurfaceType type; | ||
| 299 | u32 width; | ||
| 300 | u32 height; | ||
| 301 | u32 depth; | ||
| 302 | u32 unaligned_height; | ||
| 303 | u32 pitch; | ||
| 304 | SurfaceTarget target; | ||
| 305 | SurfaceClass identity; | ||
| 306 | u32 max_mip_level; | ||
| 307 | bool is_layered; | ||
| 308 | bool is_array; | ||
| 309 | bool srgb_conversion; | ||
| 310 | // Parameters used for caching | ||
| 311 | u8* host_ptr; | ||
| 312 | GPUVAddr gpu_addr; | ||
| 313 | std::size_t size_in_bytes; | ||
| 314 | std::size_t size_in_bytes_gl; | ||
| 315 | |||
| 316 | // Render target specific parameters, not used in caching | ||
| 317 | struct { | ||
| 318 | u32 index; | ||
| 319 | u32 array_mode; | ||
| 320 | u32 volume; | ||
| 321 | u32 layer_stride; | ||
| 322 | u32 base_layer; | ||
| 323 | } rt; | ||
| 324 | |||
| 325 | private: | ||
| 326 | std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false, | ||
| 327 | bool uncompressed = false) const; | ||
| 328 | std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false, | ||
| 329 | bool uncompressed = false) const; | ||
| 330 | }; | ||
| 331 | |||
| 332 | }; // namespace OpenGL | ||
| 333 | |||
| 334 | /// Hashable variation of SurfaceParams, used for a key in the surface cache | ||
| 335 | struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { | ||
| 336 | static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { | ||
| 337 | SurfaceReserveKey res; | ||
| 338 | res.state = params; | ||
| 339 | res.state.identity = {}; // Ignore the origin of the texture | ||
| 340 | res.state.gpu_addr = {}; // Ignore GPU vaddr in caching | ||
| 341 | res.state.rt = {}; // Ignore rt config in caching | ||
| 342 | return res; | ||
| 343 | } | ||
| 344 | }; | ||
| 345 | namespace std { | ||
| 346 | template <> | ||
| 347 | struct hash<SurfaceReserveKey> { | ||
| 348 | std::size_t operator()(const SurfaceReserveKey& k) const { | ||
| 349 | return k.Hash(); | ||
| 350 | } | ||
| 351 | }; | ||
| 352 | } // namespace std | ||
| 353 | |||
| 354 | namespace OpenGL { | ||
| 355 | |||
| 356 | class RasterizerOpenGL; | ||
| 357 | |||
| 358 | // This is used to store temporary big buffers, | ||
| 359 | // instead of creating/destroying all the time | ||
| 360 | struct RasterizerTemporaryMemory { | ||
| 361 | std::vector<std::vector<u8>> gl_buffer; | ||
| 362 | }; | ||
| 363 | |||
| 364 | class CachedSurface final : public RasterizerCacheObject { | ||
| 365 | public: | ||
| 366 | explicit CachedSurface(const SurfaceParams& params); | ||
| 367 | |||
| 368 | VAddr GetCpuAddr() const override { | ||
| 369 | return cpu_addr; | ||
| 370 | } | ||
| 371 | |||
| 372 | std::size_t GetSizeInBytes() const override { | ||
| 373 | return cached_size_in_bytes; | ||
| 374 | } | ||
| 375 | |||
| 376 | std::size_t GetMemorySize() const { | ||
| 377 | return memory_size; | ||
| 378 | } | ||
| 379 | |||
| 380 | const OGLTexture& Texture() const { | ||
| 381 | return texture; | ||
| 382 | } | ||
| 383 | |||
| 384 | const OGLTexture& Texture(bool as_array) { | ||
| 385 | if (params.is_array == as_array) { | ||
| 386 | return texture; | ||
| 387 | } else { | ||
| 388 | EnsureTextureDiscrepantView(); | ||
| 389 | return discrepant_view; | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | GLenum Target() const { | ||
| 394 | return gl_target; | ||
| 395 | } | ||
| 396 | |||
| 397 | const SurfaceParams& GetSurfaceParams() const { | ||
| 398 | return params; | ||
| 399 | } | ||
| 400 | |||
| 401 | // Read/Write data in Switch memory to/from gl_buffer | ||
| 402 | void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); | ||
| 403 | void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); | ||
| 404 | |||
| 405 | // Upload data in gl_buffer to this surface's texture | ||
| 406 | void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, | ||
| 407 | GLuint draw_fb_handle); | ||
| 408 | |||
| 409 | void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, | ||
| 410 | Tegra::Texture::SwizzleSource swizzle_y, | ||
| 411 | Tegra::Texture::SwizzleSource swizzle_z, | ||
| 412 | Tegra::Texture::SwizzleSource swizzle_w); | ||
| 413 | |||
| 414 | void MarkReinterpreted() { | ||
| 415 | reinterpreted = true; | ||
| 416 | } | ||
| 417 | |||
| 418 | bool IsReinterpreted() const { | ||
| 419 | return reinterpreted; | ||
| 420 | } | ||
| 421 | |||
| 422 | void MarkForReload(bool reload) { | ||
| 423 | must_reload = reload; | ||
| 424 | } | ||
| 425 | |||
| 426 | bool MustReload() const { | ||
| 427 | return must_reload; | ||
| 428 | } | ||
| 429 | |||
| 430 | bool IsUploaded() const { | ||
| 431 | return params.identity == SurfaceParams::SurfaceClass::Uploaded; | ||
| 432 | } | ||
| 433 | |||
| 434 | private: | ||
| 435 | void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, | ||
| 436 | GLuint read_fb_handle, GLuint draw_fb_handle); | ||
| 437 | |||
| 438 | void EnsureTextureDiscrepantView(); | ||
| 439 | |||
| 440 | OGLTexture texture; | ||
| 441 | OGLTexture discrepant_view; | ||
| 442 | SurfaceParams params{}; | ||
| 443 | GLenum gl_target{}; | ||
| 444 | GLenum gl_internal_format{}; | ||
| 445 | std::size_t cached_size_in_bytes{}; | ||
| 446 | std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; | ||
| 447 | std::size_t memory_size; | ||
| 448 | bool reinterpreted = false; | ||
| 449 | bool must_reload = false; | ||
| 450 | VAddr cpu_addr{}; | ||
| 451 | }; | ||
| 452 | |||
| 453 | class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { | ||
| 454 | public: | ||
| 455 | explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 456 | |||
| 457 | /// Get a surface based on the texture configuration | ||
| 458 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, | ||
| 459 | const GLShader::SamplerEntry& entry); | ||
| 460 | |||
| 461 | /// Get the depth surface based on the framebuffer configuration | ||
| 462 | Surface GetDepthBufferSurface(bool preserve_contents); | ||
| 463 | |||
| 464 | /// Get the color surface based on the framebuffer configuration and the specified render target | ||
| 465 | Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); | ||
| 466 | |||
| 467 | /// Tries to find a framebuffer using on the provided CPU address | ||
| 468 | Surface TryFindFramebufferSurface(const u8* host_ptr) const; | ||
| 469 | |||
| 470 | /// Copies the contents of one surface to another | ||
| 471 | void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 472 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 473 | const Common::Rectangle<u32>& src_rect, | ||
| 474 | const Common::Rectangle<u32>& dst_rect); | ||
| 475 | |||
| 476 | void SignalPreDrawCall(); | ||
| 477 | void SignalPostDrawCall(); | ||
| 478 | |||
| 479 | protected: | ||
| 480 | void FlushObjectInner(const Surface& object) override { | ||
| 481 | object->FlushGLBuffer(temporal_memory); | ||
| 482 | } | ||
| 483 | |||
| 484 | private: | ||
| 485 | void LoadSurface(const Surface& surface); | ||
| 486 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); | ||
| 487 | |||
| 488 | /// Gets an uncached surface, creating it if need be | ||
| 489 | Surface GetUncachedSurface(const SurfaceParams& params); | ||
| 490 | |||
| 491 | /// Recreates a surface with new parameters | ||
| 492 | Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params); | ||
| 493 | |||
| 494 | /// Reserves a unique surface that can be reused later | ||
| 495 | void ReserveSurface(const Surface& surface); | ||
| 496 | |||
| 497 | /// Tries to get a reserved surface for the specified parameters | ||
| 498 | Surface TryGetReservedSurface(const SurfaceParams& params); | ||
| 499 | |||
| 500 | // Partialy reinterpret a surface based on a triggering_surface that collides with it. | ||
| 501 | // returns true if the reinterpret was successful, false in case it was not. | ||
| 502 | bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); | ||
| 503 | |||
| 504 | /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data | ||
| 505 | void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 506 | void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 507 | void FastCopySurface(const Surface& src_surface, const Surface& dst_surface); | ||
| 508 | void CopySurface(const Surface& src_surface, const Surface& dst_surface, | ||
| 509 | const GLuint copy_pbo_handle, const GLenum src_attachment = 0, | ||
| 510 | const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0); | ||
| 511 | |||
| 512 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | ||
| 513 | /// previously been used. This is to prevent surfaces from being constantly created and | ||
| 514 | /// destroyed when used with different surface parameters. | ||
| 515 | std::unordered_map<SurfaceReserveKey, Surface> surface_reserve; | ||
| 516 | |||
| 517 | OGLFramebuffer read_framebuffer; | ||
| 518 | OGLFramebuffer draw_framebuffer; | ||
| 519 | |||
| 520 | bool texception = false; | ||
| 521 | |||
| 522 | /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one | ||
| 523 | /// using the new format. | ||
| 524 | OGLBuffer copy_pbo; | ||
| 525 | |||
| 526 | std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; | ||
| 527 | std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; | ||
| 528 | Surface last_depth_buffer; | ||
| 529 | |||
| 530 | RasterizerTemporaryMemory temporal_memory; | ||
| 531 | |||
| 532 | using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; | ||
| 533 | using SurfaceInterval = typename SurfaceIntervalCache::interval_type; | ||
| 534 | |||
| 535 | static auto GetReinterpretInterval(const Surface& object) { | ||
| 536 | return SurfaceInterval::right_open(object->GetCacheAddr() + 1, | ||
| 537 | object->GetCacheAddr() + object->GetMemorySize() - 1); | ||
| 538 | } | ||
| 539 | |||
| 540 | // Reinterpreted surfaces are very fragil as the game may keep rendering into them. | ||
| 541 | SurfaceIntervalCache reinterpreted_surfaces; | ||
| 542 | |||
| 543 | void RegisterReinterpretSurface(Surface reinterpret_surface) { | ||
| 544 | auto interval = GetReinterpretInterval(reinterpret_surface); | ||
| 545 | reinterpreted_surfaces.insert({interval, reinterpret_surface}); | ||
| 546 | reinterpret_surface->MarkReinterpreted(); | ||
| 547 | } | ||
| 548 | |||
| 549 | Surface CollideOnReinterpretedSurface(CacheAddr addr) const { | ||
| 550 | const SurfaceInterval interval{addr}; | ||
| 551 | for (auto& pair : | ||
| 552 | boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { | ||
| 553 | return pair.second; | ||
| 554 | } | ||
| 555 | return nullptr; | ||
| 556 | } | ||
| 557 | |||
| 558 | void Register(const Surface& object) override { | ||
| 559 | RasterizerCache<Surface>::Register(object); | ||
| 560 | } | ||
| 561 | |||
| 562 | /// Unregisters an object from the cache | ||
| 563 | void Unregister(const Surface& object) override { | ||
| 564 | if (object->IsReinterpreted()) { | ||
| 565 | auto interval = GetReinterpretInterval(object); | ||
| 566 | reinterpreted_surfaces.erase(interval); | ||
| 567 | } | ||
| 568 | RasterizerCache<Surface>::Unregister(object); | ||
| 569 | } | ||
| 570 | }; | ||
| 571 | |||
| 572 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index bfe666a73..5c96c1d46 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -33,6 +33,24 @@ void OGLTexture::Release() { | |||
| 33 | handle = 0; | 33 | handle = 0; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | void OGLTextureView::Create() { | ||
| 37 | if (handle != 0) | ||
| 38 | return; | ||
| 39 | |||
| 40 | MICROPROFILE_SCOPE(OpenGL_ResourceCreation); | ||
| 41 | glGenTextures(1, &handle); | ||
| 42 | } | ||
| 43 | |||
| 44 | void OGLTextureView::Release() { | ||
| 45 | if (handle == 0) | ||
| 46 | return; | ||
| 47 | |||
| 48 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 49 | glDeleteTextures(1, &handle); | ||
| 50 | OpenGLState::GetCurState().UnbindTexture(handle).Apply(); | ||
| 51 | handle = 0; | ||
| 52 | } | ||
| 53 | |||
| 36 | void OGLSampler::Create() { | 54 | void OGLSampler::Create() { |
| 37 | if (handle != 0) | 55 | if (handle != 0) |
| 38 | return; | 56 | return; |
| @@ -130,6 +148,12 @@ void OGLBuffer::Release() { | |||
| 130 | handle = 0; | 148 | handle = 0; |
| 131 | } | 149 | } |
| 132 | 150 | ||
| 151 | void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { | ||
| 152 | ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); | ||
| 153 | |||
| 154 | glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); | ||
| 155 | } | ||
| 156 | |||
| 133 | void OGLSync::Create() { | 157 | void OGLSync::Create() { |
| 134 | if (handle != 0) | 158 | if (handle != 0) |
| 135 | return; | 159 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index fbb93ee49..3a85a1d4c 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -36,6 +36,31 @@ public: | |||
| 36 | GLuint handle = 0; | 36 | GLuint handle = 0; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | class OGLTextureView : private NonCopyable { | ||
| 40 | public: | ||
| 41 | OGLTextureView() = default; | ||
| 42 | |||
| 43 | OGLTextureView(OGLTextureView&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 44 | |||
| 45 | ~OGLTextureView() { | ||
| 46 | Release(); | ||
| 47 | } | ||
| 48 | |||
| 49 | OGLTextureView& operator=(OGLTextureView&& o) noexcept { | ||
| 50 | Release(); | ||
| 51 | handle = std::exchange(o.handle, 0); | ||
| 52 | return *this; | ||
| 53 | } | ||
| 54 | |||
| 55 | /// Creates a new internal OpenGL resource and stores the handle | ||
| 56 | void Create(); | ||
| 57 | |||
| 58 | /// Deletes the internal OpenGL resource | ||
| 59 | void Release(); | ||
| 60 | |||
| 61 | GLuint handle = 0; | ||
| 62 | }; | ||
| 63 | |||
| 39 | class OGLSampler : private NonCopyable { | 64 | class OGLSampler : private NonCopyable { |
| 40 | public: | 65 | public: |
| 41 | OGLSampler() = default; | 66 | OGLSampler() = default; |
| @@ -161,6 +186,9 @@ public: | |||
| 161 | /// Deletes the internal OpenGL resource | 186 | /// Deletes the internal OpenGL resource |
| 162 | void Release(); | 187 | void Release(); |
| 163 | 188 | ||
| 189 | // Converts the buffer into a stream copy buffer with a fixed size | ||
| 190 | void MakeStreamCopy(std::size_t buffer_size); | ||
| 191 | |||
| 164 | GLuint handle = 0; | 192 | GLuint handle = 0; |
| 165 | }; | 193 | }; |
| 166 | 194 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 8d3d7bfdc..f9b2b03a0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -103,15 +103,22 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
| 103 | /// Calculates the size of a program stream | 103 | /// Calculates the size of a program stream |
| 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | 104 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |
| 105 | constexpr std::size_t start_offset = 10; | 105 | constexpr std::size_t start_offset = 10; |
| 106 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 107 | // shaders end with one. | ||
| 108 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 109 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 106 | std::size_t offset = start_offset; | 110 | std::size_t offset = start_offset; |
| 107 | std::size_t size = start_offset * sizeof(u64); | 111 | std::size_t size = start_offset * sizeof(u64); |
| 108 | while (offset < program.size()) { | 112 | while (offset < program.size()) { |
| 109 | const u64 instruction = program[offset]; | 113 | const u64 instruction = program[offset]; |
| 110 | if (!IsSchedInstruction(offset, start_offset)) { | 114 | if (!IsSchedInstruction(offset, start_offset)) { |
| 111 | if (instruction == 0 || (instruction >> 52) == 0x50b) { | 115 | if ((instruction & mask) == self_jumping_branch) { |
| 112 | // End on Maxwell's "nop" instruction | 116 | // End on Maxwell's "nop" instruction |
| 113 | break; | 117 | break; |
| 114 | } | 118 | } |
| 119 | if (instruction == 0) { | ||
| 120 | break; | ||
| 121 | } | ||
| 115 | } | 122 | } |
| 116 | size += sizeof(u64); | 123 | size += sizeof(u64); |
| 117 | offset++; | 124 | offset++; |
| @@ -168,8 +175,12 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr | |||
| 168 | } | 175 | } |
| 169 | 176 | ||
| 170 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 177 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, |
| 171 | Maxwell::ShaderProgram program_type, BaseBindings base_bindings, | 178 | Maxwell::ShaderProgram program_type, const ProgramVariant& variant, |
| 172 | GLenum primitive_mode, bool hint_retrievable = false) { | 179 | bool hint_retrievable = false) { |
| 180 | auto base_bindings{variant.base_bindings}; | ||
| 181 | const auto primitive_mode{variant.primitive_mode}; | ||
| 182 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | ||
| 183 | |||
| 173 | std::string source = "#version 430 core\n" | 184 | std::string source = "#version 430 core\n" |
| 174 | "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 185 | "#extension GL_ARB_separate_shader_objects : enable\n\n"; |
| 175 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 186 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); |
| @@ -186,6 +197,18 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 186 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | 197 | source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), |
| 187 | base_bindings.sampler++); | 198 | base_bindings.sampler++); |
| 188 | } | 199 | } |
| 200 | for (const auto& image : entries.images) { | ||
| 201 | source += | ||
| 202 | fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); | ||
| 203 | } | ||
| 204 | |||
| 205 | // Transform 1D textures to texture samplers by declaring its preprocessor macros. | ||
| 206 | for (std::size_t i = 0; i < texture_buffer_usage.size(); ++i) { | ||
| 207 | if (!texture_buffer_usage.test(i)) { | ||
| 208 | continue; | ||
| 209 | } | ||
| 210 | source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | ||
| 211 | } | ||
| 189 | 212 | ||
| 190 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 213 | if (program_type == Maxwell::ShaderProgram::Geometry) { |
| 191 | const auto [glsl_topology, debug_name, max_vertices] = | 214 | const auto [glsl_topology, debug_name, max_vertices] = |
| @@ -254,20 +277,18 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | |||
| 254 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | 277 | return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); |
| 255 | } | 278 | } |
| 256 | 279 | ||
| 257 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, | 280 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 258 | BaseBindings base_bindings) { | ||
| 259 | GLuint handle{}; | 281 | GLuint handle{}; |
| 260 | if (program_type == Maxwell::ShaderProgram::Geometry) { | 282 | if (program_type == Maxwell::ShaderProgram::Geometry) { |
| 261 | handle = GetGeometryShader(primitive_mode, base_bindings); | 283 | handle = GetGeometryShader(variant); |
| 262 | } else { | 284 | } else { |
| 263 | const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); | 285 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); |
| 264 | auto& program = entry->second; | 286 | auto& program = entry->second; |
| 265 | if (is_cache_miss) { | 287 | if (is_cache_miss) { |
| 266 | program = TryLoadProgram(primitive_mode, base_bindings); | 288 | program = TryLoadProgram(variant); |
| 267 | if (!program) { | 289 | if (!program) { |
| 268 | program = | 290 | program = SpecializeShader(code, entries, program_type, variant); |
| 269 | SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); | 291 | disk_cache.SaveUsage(GetUsage(variant)); |
| 270 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||
| 271 | } | 292 | } |
| 272 | 293 | ||
| 273 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | 294 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| @@ -276,6 +297,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | |||
| 276 | handle = program->handle; | 297 | handle = program->handle; |
| 277 | } | 298 | } |
| 278 | 299 | ||
| 300 | auto base_bindings{variant.base_bindings}; | ||
| 279 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | 301 | base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; |
| 280 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | 302 | base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); |
| 281 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | 303 | base_bindings.sampler += static_cast<u32>(entries.samplers.size()); |
| @@ -283,43 +305,42 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | |||
| 283 | return {handle, base_bindings}; | 305 | return {handle, base_bindings}; |
| 284 | } | 306 | } |
| 285 | 307 | ||
| 286 | GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { | 308 | GLuint CachedShader::GetGeometryShader(const ProgramVariant& variant) { |
| 287 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); | 309 | const auto [entry, is_cache_miss] = geometry_programs.try_emplace(variant); |
| 288 | auto& programs = entry->second; | 310 | auto& programs = entry->second; |
| 289 | 311 | ||
| 290 | switch (primitive_mode) { | 312 | switch (variant.primitive_mode) { |
| 291 | case GL_POINTS: | 313 | case GL_POINTS: |
| 292 | return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); | 314 | return LazyGeometryProgram(programs.points, variant); |
| 293 | case GL_LINES: | 315 | case GL_LINES: |
| 294 | case GL_LINE_STRIP: | 316 | case GL_LINE_STRIP: |
| 295 | return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode); | 317 | return LazyGeometryProgram(programs.lines, variant); |
| 296 | case GL_LINES_ADJACENCY: | 318 | case GL_LINES_ADJACENCY: |
| 297 | case GL_LINE_STRIP_ADJACENCY: | 319 | case GL_LINE_STRIP_ADJACENCY: |
| 298 | return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode); | 320 | return LazyGeometryProgram(programs.lines_adjacency, variant); |
| 299 | case GL_TRIANGLES: | 321 | case GL_TRIANGLES: |
| 300 | case GL_TRIANGLE_STRIP: | 322 | case GL_TRIANGLE_STRIP: |
| 301 | case GL_TRIANGLE_FAN: | 323 | case GL_TRIANGLE_FAN: |
| 302 | return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode); | 324 | return LazyGeometryProgram(programs.triangles, variant); |
| 303 | case GL_TRIANGLES_ADJACENCY: | 325 | case GL_TRIANGLES_ADJACENCY: |
| 304 | case GL_TRIANGLE_STRIP_ADJACENCY: | 326 | case GL_TRIANGLE_STRIP_ADJACENCY: |
| 305 | return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode); | 327 | return LazyGeometryProgram(programs.triangles_adjacency, variant); |
| 306 | default: | 328 | default: |
| 307 | UNREACHABLE_MSG("Unknown primitive mode."); | 329 | UNREACHABLE_MSG("Unknown primitive mode."); |
| 308 | return LazyGeometryProgram(programs.points, base_bindings, primitive_mode); | 330 | return LazyGeometryProgram(programs.points, variant); |
| 309 | } | 331 | } |
| 310 | } | 332 | } |
| 311 | 333 | ||
| 312 | GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, | 334 | GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, |
| 313 | GLenum primitive_mode) { | 335 | const ProgramVariant& variant) { |
| 314 | if (target_program) { | 336 | if (target_program) { |
| 315 | return target_program->handle; | 337 | return target_program->handle; |
| 316 | } | 338 | } |
| 317 | const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode); | 339 | const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(variant.primitive_mode); |
| 318 | target_program = TryLoadProgram(primitive_mode, base_bindings); | 340 | target_program = TryLoadProgram(variant); |
| 319 | if (!target_program) { | 341 | if (!target_program) { |
| 320 | target_program = | 342 | target_program = SpecializeShader(code, entries, program_type, variant); |
| 321 | SpecializeShader(code, entries, program_type, base_bindings, primitive_mode); | 343 | disk_cache.SaveUsage(GetUsage(variant)); |
| 322 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||
| 323 | } | 344 | } |
| 324 | 345 | ||
| 325 | LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); | 346 | LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name); |
| @@ -327,18 +348,19 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind | |||
| 327 | return target_program->handle; | 348 | return target_program->handle; |
| 328 | }; | 349 | }; |
| 329 | 350 | ||
| 330 | CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode, | 351 | CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { |
| 331 | BaseBindings base_bindings) const { | 352 | const auto found = precompiled_programs.find(GetUsage(variant)); |
| 332 | const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings)); | ||
| 333 | if (found == precompiled_programs.end()) { | 353 | if (found == precompiled_programs.end()) { |
| 334 | return {}; | 354 | return {}; |
| 335 | } | 355 | } |
| 336 | return found->second; | 356 | return found->second; |
| 337 | } | 357 | } |
| 338 | 358 | ||
| 339 | ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, | 359 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { |
| 340 | BaseBindings base_bindings) const { | 360 | ShaderDiskCacheUsage usage; |
| 341 | return {unique_identifier, base_bindings, primitive_mode}; | 361 | usage.unique_identifier = unique_identifier; |
| 362 | usage.variant = variant; | ||
| 363 | return usage; | ||
| 342 | } | 364 | } |
| 343 | 365 | ||
| 344 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 366 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -404,8 +426,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 404 | } | 426 | } |
| 405 | if (!shader) { | 427 | if (!shader) { |
| 406 | shader = SpecializeShader(unspecialized.code, unspecialized.entries, | 428 | shader = SpecializeShader(unspecialized.code, unspecialized.entries, |
| 407 | unspecialized.program_type, usage.bindings, | 429 | unspecialized.program_type, usage.variant, true); |
| 408 | usage.primitive, true); | ||
| 409 | } | 430 | } |
| 410 | 431 | ||
| 411 | std::scoped_lock lock(mutex); | 432 | std::scoped_lock lock(mutex); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 01af9b28a..bbb53cdf4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <bitset> | ||
| 9 | #include <memory> | 10 | #include <memory> |
| 10 | #include <set> | 11 | #include <set> |
| 11 | #include <tuple> | 12 | #include <tuple> |
| @@ -74,8 +75,7 @@ public: | |||
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | /// Gets the GL program handle for the shader | 77 | /// Gets the GL program handle for the shader |
| 77 | std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode, | 78 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 78 | BaseBindings base_bindings); | ||
| 79 | 79 | ||
| 80 | private: | 80 | private: |
| 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | 81 | explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, |
| @@ -92,15 +92,14 @@ private: | |||
| 92 | CachedProgram triangles_adjacency; | 92 | CachedProgram triangles_adjacency; |
| 93 | }; | 93 | }; |
| 94 | 94 | ||
| 95 | GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); | 95 | GLuint GetGeometryShader(const ProgramVariant& variant); |
| 96 | 96 | ||
| 97 | /// Generates a geometry shader or returns one that already exists. | 97 | /// Generates a geometry shader or returns one that already exists. |
| 98 | GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings, | 98 | GLuint LazyGeometryProgram(CachedProgram& target_program, const ProgramVariant& variant); |
| 99 | GLenum primitive_mode); | ||
| 100 | 99 | ||
| 101 | CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const; | 100 | CachedProgram TryLoadProgram(const ProgramVariant& variant) const; |
| 102 | 101 | ||
| 103 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | 102 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; |
| 104 | 103 | ||
| 105 | u8* host_ptr{}; | 104 | u8* host_ptr{}; |
| 106 | VAddr cpu_addr{}; | 105 | VAddr cpu_addr{}; |
| @@ -113,8 +112,8 @@ private: | |||
| 113 | std::string code; | 112 | std::string code; |
| 114 | std::size_t shader_length{}; | 113 | std::size_t shader_length{}; |
| 115 | 114 | ||
| 116 | std::unordered_map<BaseBindings, CachedProgram> programs; | 115 | std::unordered_map<ProgramVariant, CachedProgram> programs; |
| 117 | std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs; | 116 | std::unordered_map<ProgramVariant, GeometryPrograms> geometry_programs; |
| 118 | 117 | ||
| 119 | std::unordered_map<u32, GLuint> cbuf_resource_cache; | 118 | std::unordered_map<u32, GLuint> cbuf_resource_cache; |
| 120 | std::unordered_map<u32, GLuint> gmem_resource_cache; | 119 | std::unordered_map<u32, GLuint> gmem_resource_cache; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 7dc2e0560..5f2f1510c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -180,6 +180,7 @@ public: | |||
| 180 | DeclareGlobalMemory(); | 180 | DeclareGlobalMemory(); |
| 181 | DeclareSamplers(); | 181 | DeclareSamplers(); |
| 182 | DeclarePhysicalAttributeReader(); | 182 | DeclarePhysicalAttributeReader(); |
| 183 | DeclareImages(); | ||
| 183 | 184 | ||
| 184 | code.AddLine("void execute_{}() {{", suffix); | 185 | code.AddLine("void execute_{}() {{", suffix); |
| 185 | ++code.scope; | 186 | ++code.scope; |
| @@ -234,6 +235,9 @@ public: | |||
| 234 | for (const auto& sampler : ir.GetSamplers()) { | 235 | for (const auto& sampler : ir.GetSamplers()) { |
| 235 | entries.samplers.emplace_back(sampler); | 236 | entries.samplers.emplace_back(sampler); |
| 236 | } | 237 | } |
| 238 | for (const auto& image : ir.GetImages()) { | ||
| 239 | entries.images.emplace_back(image); | ||
| 240 | } | ||
| 237 | for (const auto& gmem_pair : ir.GetGlobalMemory()) { | 241 | for (const auto& gmem_pair : ir.GetGlobalMemory()) { |
| 238 | const auto& [base, usage] = gmem_pair; | 242 | const auto& [base, usage] = gmem_pair; |
| 239 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, | 243 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, |
| @@ -453,9 +457,13 @@ private: | |||
| 453 | void DeclareSamplers() { | 457 | void DeclareSamplers() { |
| 454 | const auto& samplers = ir.GetSamplers(); | 458 | const auto& samplers = ir.GetSamplers(); |
| 455 | for (const auto& sampler : samplers) { | 459 | for (const auto& sampler : samplers) { |
| 456 | std::string sampler_type = [&sampler] { | 460 | const std::string name{GetSampler(sampler)}; |
| 461 | const std::string description{"layout (binding = SAMPLER_BINDING_" + | ||
| 462 | std::to_string(sampler.GetIndex()) + ") uniform"}; | ||
| 463 | std::string sampler_type = [&]() { | ||
| 457 | switch (sampler.GetType()) { | 464 | switch (sampler.GetType()) { |
| 458 | case Tegra::Shader::TextureType::Texture1D: | 465 | case Tegra::Shader::TextureType::Texture1D: |
| 466 | // Special cased, read below. | ||
| 459 | return "sampler1D"; | 467 | return "sampler1D"; |
| 460 | case Tegra::Shader::TextureType::Texture2D: | 468 | case Tegra::Shader::TextureType::Texture2D: |
| 461 | return "sampler2D"; | 469 | return "sampler2D"; |
| @@ -475,8 +483,19 @@ private: | |||
| 475 | sampler_type += "Shadow"; | 483 | sampler_type += "Shadow"; |
| 476 | } | 484 | } |
| 477 | 485 | ||
| 478 | code.AddLine("layout (binding = SAMPLER_BINDING_{}) uniform {} {};", sampler.GetIndex(), | 486 | if (sampler.GetType() == Tegra::Shader::TextureType::Texture1D) { |
| 479 | sampler_type, GetSampler(sampler)); | 487 | // 1D textures can be aliased to texture buffers, hide the declarations behind a |
| 488 | // preprocessor flag and use one or the other from the GPU state. This has to be | ||
| 489 | // done because shaders don't have enough information to determine the texture type. | ||
| 490 | EmitIfdefIsBuffer(sampler); | ||
| 491 | code.AddLine("{} samplerBuffer {};", description, name); | ||
| 492 | code.AddLine("#else"); | ||
| 493 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 494 | code.AddLine("#endif"); | ||
| 495 | } else { | ||
| 496 | // The other texture types (2D, 3D and cubes) don't have this issue. | ||
| 497 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 498 | } | ||
| 480 | } | 499 | } |
| 481 | if (!samplers.empty()) { | 500 | if (!samplers.empty()) { |
| 482 | code.AddNewLine(); | 501 | code.AddNewLine(); |
| @@ -516,6 +535,37 @@ private: | |||
| 516 | code.AddNewLine(); | 535 | code.AddNewLine(); |
| 517 | } | 536 | } |
| 518 | 537 | ||
| 538 | void DeclareImages() { | ||
| 539 | const auto& images{ir.GetImages()}; | ||
| 540 | for (const auto& image : images) { | ||
| 541 | const std::string image_type = [&]() { | ||
| 542 | switch (image.GetType()) { | ||
| 543 | case Tegra::Shader::ImageType::Texture1D: | ||
| 544 | return "image1D"; | ||
| 545 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 546 | return "bufferImage"; | ||
| 547 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 548 | return "image1DArray"; | ||
| 549 | case Tegra::Shader::ImageType::Texture2D: | ||
| 550 | return "image2D"; | ||
| 551 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 552 | return "image2DArray"; | ||
| 553 | case Tegra::Shader::ImageType::Texture3D: | ||
| 554 | return "image3D"; | ||
| 555 | default: | ||
| 556 | UNREACHABLE(); | ||
| 557 | return "image1D"; | ||
| 558 | } | ||
| 559 | }(); | ||
| 560 | code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform " | ||
| 561 | "{} {};", | ||
| 562 | image.GetIndex(), image_type, GetImage(image)); | ||
| 563 | } | ||
| 564 | if (!images.empty()) { | ||
| 565 | code.AddNewLine(); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | |||
| 519 | void VisitBlock(const NodeBlock& bb) { | 569 | void VisitBlock(const NodeBlock& bb) { |
| 520 | for (const auto& node : bb) { | 570 | for (const auto& node : bb) { |
| 521 | if (const std::string expr = Visit(node); !expr.empty()) { | 571 | if (const std::string expr = Visit(node); !expr.empty()) { |
| @@ -1439,13 +1489,61 @@ private: | |||
| 1439 | else if (next < count) | 1489 | else if (next < count) |
| 1440 | expr += ", "; | 1490 | expr += ", "; |
| 1441 | } | 1491 | } |
| 1492 | |||
| 1493 | // Store a copy of the expression without the lod to be used with texture buffers | ||
| 1494 | std::string expr_buffer = expr; | ||
| 1495 | |||
| 1442 | if (meta->lod) { | 1496 | if (meta->lod) { |
| 1443 | expr += ", "; | 1497 | expr += ", "; |
| 1444 | expr += CastOperand(Visit(meta->lod), Type::Int); | 1498 | expr += CastOperand(Visit(meta->lod), Type::Int); |
| 1445 | } | 1499 | } |
| 1446 | expr += ')'; | 1500 | expr += ')'; |
| 1501 | expr += GetSwizzle(meta->element); | ||
| 1447 | 1502 | ||
| 1448 | return expr + GetSwizzle(meta->element); | 1503 | expr_buffer += ')'; |
| 1504 | expr_buffer += GetSwizzle(meta->element); | ||
| 1505 | |||
| 1506 | const std::string tmp{code.GenerateTemporary()}; | ||
| 1507 | EmitIfdefIsBuffer(meta->sampler); | ||
| 1508 | code.AddLine("float {} = {};", tmp, expr_buffer); | ||
| 1509 | code.AddLine("#else"); | ||
| 1510 | code.AddLine("float {} = {};", tmp, expr); | ||
| 1511 | code.AddLine("#endif"); | ||
| 1512 | |||
| 1513 | return tmp; | ||
| 1514 | } | ||
| 1515 | |||
| 1516 | std::string ImageStore(Operation operation) { | ||
| 1517 | constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; | ||
| 1518 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | ||
| 1519 | |||
| 1520 | std::string expr = "imageStore("; | ||
| 1521 | expr += GetImage(meta.image); | ||
| 1522 | expr += ", "; | ||
| 1523 | |||
| 1524 | const std::size_t coords_count{operation.GetOperandsCount()}; | ||
| 1525 | expr += constructors.at(coords_count - 1); | ||
| 1526 | for (std::size_t i = 0; i < coords_count; ++i) { | ||
| 1527 | expr += VisitOperand(operation, i, Type::Int); | ||
| 1528 | if (i + 1 < coords_count) { | ||
| 1529 | expr += ", "; | ||
| 1530 | } | ||
| 1531 | } | ||
| 1532 | expr += "), "; | ||
| 1533 | |||
| 1534 | const std::size_t values_count{meta.values.size()}; | ||
| 1535 | UNIMPLEMENTED_IF(values_count != 4); | ||
| 1536 | expr += "vec4("; | ||
| 1537 | for (std::size_t i = 0; i < values_count; ++i) { | ||
| 1538 | expr += Visit(meta.values.at(i)); | ||
| 1539 | if (i + 1 < values_count) { | ||
| 1540 | expr += ", "; | ||
| 1541 | } | ||
| 1542 | } | ||
| 1543 | expr += "));"; | ||
| 1544 | |||
| 1545 | code.AddLine(expr); | ||
| 1546 | return {}; | ||
| 1449 | } | 1547 | } |
| 1450 | 1548 | ||
| 1451 | std::string Branch(Operation operation) { | 1549 | std::string Branch(Operation operation) { |
| @@ -1688,6 +1786,8 @@ private: | |||
| 1688 | &GLSLDecompiler::TextureQueryLod, | 1786 | &GLSLDecompiler::TextureQueryLod, |
| 1689 | &GLSLDecompiler::TexelFetch, | 1787 | &GLSLDecompiler::TexelFetch, |
| 1690 | 1788 | ||
| 1789 | &GLSLDecompiler::ImageStore, | ||
| 1790 | |||
| 1691 | &GLSLDecompiler::Branch, | 1791 | &GLSLDecompiler::Branch, |
| 1692 | &GLSLDecompiler::PushFlowStack, | 1792 | &GLSLDecompiler::PushFlowStack, |
| 1693 | &GLSLDecompiler::PopFlowStack, | 1793 | &GLSLDecompiler::PopFlowStack, |
| @@ -1756,6 +1856,14 @@ private: | |||
| 1756 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); | 1856 | return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); |
| 1757 | } | 1857 | } |
| 1758 | 1858 | ||
| 1859 | std::string GetImage(const Image& image) const { | ||
| 1860 | return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | void EmitIfdefIsBuffer(const Sampler& sampler) { | ||
| 1864 | code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); | ||
| 1865 | } | ||
| 1866 | |||
| 1759 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { | 1867 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { |
| 1760 | return fmt::format("{}_{}_{}", name, index, suffix); | 1868 | return fmt::format("{}_{}_{}", name, index, suffix); |
| 1761 | } | 1869 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index c1569e737..14d11c7fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -27,6 +27,7 @@ struct ShaderEntries; | |||
| 27 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 27 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 28 | using ProgramResult = std::pair<std::string, ShaderEntries>; | 28 | using ProgramResult = std::pair<std::string, ShaderEntries>; |
| 29 | using SamplerEntry = VideoCommon::Shader::Sampler; | 29 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 30 | using ImageEntry = VideoCommon::Shader::Image; | ||
| 30 | 31 | ||
| 31 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { | 32 | class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { |
| 32 | public: | 33 | public: |
| @@ -74,6 +75,7 @@ struct ShaderEntries { | |||
| 74 | std::vector<ConstBufferEntry> const_buffers; | 75 | std::vector<ConstBufferEntry> const_buffers; |
| 75 | std::vector<SamplerEntry> samplers; | 76 | std::vector<SamplerEntry> samplers; |
| 76 | std::vector<SamplerEntry> bindless_samplers; | 77 | std::vector<SamplerEntry> bindless_samplers; |
| 78 | std::vector<ImageEntry> images; | ||
| 77 | std::vector<GlobalMemoryEntry> global_memory_entries; | 79 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 78 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 80 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 79 | std::size_t shader_length{}; | 81 | std::size_t shader_length{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index ee4a45ca2..10688397b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -34,11 +34,11 @@ enum class PrecompiledEntryKind : u32 { | |||
| 34 | Dump, | 34 | Dump, |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | constexpr u32 NativeVersion = 1; | 37 | constexpr u32 NativeVersion = 4; |
| 38 | 38 | ||
| 39 | // Making sure sizes doesn't change by accident | 39 | // Making sure sizes doesn't change by accident |
| 40 | static_assert(sizeof(BaseBindings) == 12); | 40 | static_assert(sizeof(BaseBindings) == 16); |
| 41 | static_assert(sizeof(ShaderDiskCacheUsage) == 24); | 41 | static_assert(sizeof(ShaderDiskCacheUsage) == 40); |
| 42 | 42 | ||
| 43 | namespace { | 43 | namespace { |
| 44 | 44 | ||
| @@ -332,11 +332,28 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 332 | static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); | 332 | static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); |
| 333 | } | 333 | } |
| 334 | 334 | ||
| 335 | u32 images_count{}; | ||
| 336 | if (!LoadObjectFromPrecompiled(images_count)) { | ||
| 337 | return {}; | ||
| 338 | } | ||
| 339 | for (u32 i = 0; i < images_count; ++i) { | ||
| 340 | u64 offset{}; | ||
| 341 | u64 index{}; | ||
| 342 | u32 type{}; | ||
| 343 | u8 is_bindless{}; | ||
| 344 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | ||
| 345 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) { | ||
| 346 | return {}; | ||
| 347 | } | ||
| 348 | entry.entries.images.emplace_back( | ||
| 349 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | ||
| 350 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0); | ||
| 351 | } | ||
| 352 | |||
| 335 | u32 global_memory_count{}; | 353 | u32 global_memory_count{}; |
| 336 | if (!LoadObjectFromPrecompiled(global_memory_count)) { | 354 | if (!LoadObjectFromPrecompiled(global_memory_count)) { |
| 337 | return {}; | 355 | return {}; |
| 338 | } | 356 | } |
| 339 | |||
| 340 | for (u32 i = 0; i < global_memory_count; ++i) { | 357 | for (u32 i = 0; i < global_memory_count; ++i) { |
| 341 | u32 cbuf_index{}; | 358 | u32 cbuf_index{}; |
| 342 | u32 cbuf_offset{}; | 359 | u32 cbuf_offset{}; |
| @@ -360,7 +377,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 360 | if (!LoadObjectFromPrecompiled(shader_length)) { | 377 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 361 | return {}; | 378 | return {}; |
| 362 | } | 379 | } |
| 363 | |||
| 364 | entry.entries.shader_length = static_cast<std::size_t>(shader_length); | 380 | entry.entries.shader_length = static_cast<std::size_t>(shader_length); |
| 365 | 381 | ||
| 366 | return entry; | 382 | return entry; |
| @@ -400,6 +416,18 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 400 | } | 416 | } |
| 401 | } | 417 | } |
| 402 | 418 | ||
| 419 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { | ||
| 420 | return false; | ||
| 421 | } | ||
| 422 | for (const auto& image : entries.images) { | ||
| 423 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || | ||
| 424 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || | ||
| 425 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || | ||
| 426 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) { | ||
| 427 | return false; | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 403 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { | 431 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { |
| 404 | return false; | 432 | return false; |
| 405 | } | 433 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ecd72ba58..4f296dda6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <bitset> | ||
| 7 | #include <optional> | 8 | #include <optional> |
| 8 | #include <string> | 9 | #include <string> |
| 9 | #include <tuple> | 10 | #include <tuple> |
| @@ -30,22 +31,26 @@ class IOFile; | |||
| 30 | 31 | ||
| 31 | namespace OpenGL { | 32 | namespace OpenGL { |
| 32 | 33 | ||
| 33 | using ProgramCode = std::vector<u64>; | ||
| 34 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 35 | |||
| 36 | struct ShaderDiskCacheUsage; | 34 | struct ShaderDiskCacheUsage; |
| 37 | struct ShaderDiskCacheDump; | 35 | struct ShaderDiskCacheDump; |
| 38 | 36 | ||
| 39 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | 37 | using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; |
| 40 | 38 | ||
| 41 | /// Allocated bindings used by an OpenGL shader program | 39 | using ProgramCode = std::vector<u64>; |
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 41 | |||
| 42 | using TextureBufferUsage = std::bitset<64>; | ||
| 43 | |||
| 44 | /// Allocated bindings used by an OpenGL shader program. | ||
| 42 | struct BaseBindings { | 45 | struct BaseBindings { |
| 43 | u32 cbuf{}; | 46 | u32 cbuf{}; |
| 44 | u32 gmem{}; | 47 | u32 gmem{}; |
| 45 | u32 sampler{}; | 48 | u32 sampler{}; |
| 49 | u32 image{}; | ||
| 46 | 50 | ||
| 47 | bool operator==(const BaseBindings& rhs) const { | 51 | bool operator==(const BaseBindings& rhs) const { |
| 48 | return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); | 52 | return std::tie(cbuf, gmem, sampler, image) == |
| 53 | std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); | ||
| 49 | } | 54 | } |
| 50 | 55 | ||
| 51 | bool operator!=(const BaseBindings& rhs) const { | 56 | bool operator!=(const BaseBindings& rhs) const { |
| @@ -53,15 +58,29 @@ struct BaseBindings { | |||
| 53 | } | 58 | } |
| 54 | }; | 59 | }; |
| 55 | 60 | ||
| 56 | /// Describes how a shader is used | 61 | /// Describes the different variants a single program can be compiled. |
| 62 | struct ProgramVariant { | ||
| 63 | BaseBindings base_bindings; | ||
| 64 | GLenum primitive_mode{}; | ||
| 65 | TextureBufferUsage texture_buffer_usage{}; | ||
| 66 | |||
| 67 | bool operator==(const ProgramVariant& rhs) const { | ||
| 68 | return std::tie(base_bindings, primitive_mode, texture_buffer_usage) == | ||
| 69 | std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.texture_buffer_usage); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool operator!=(const ProgramVariant& rhs) const { | ||
| 73 | return !operator==(rhs); | ||
| 74 | } | ||
| 75 | }; | ||
| 76 | |||
| 77 | /// Describes how a shader is used. | ||
| 57 | struct ShaderDiskCacheUsage { | 78 | struct ShaderDiskCacheUsage { |
| 58 | u64 unique_identifier{}; | 79 | u64 unique_identifier{}; |
| 59 | BaseBindings bindings; | 80 | ProgramVariant variant; |
| 60 | GLenum primitive{}; | ||
| 61 | 81 | ||
| 62 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | 82 | bool operator==(const ShaderDiskCacheUsage& rhs) const { |
| 63 | return std::tie(unique_identifier, bindings, primitive) == | 83 | return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); |
| 64 | std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive); | ||
| 65 | } | 84 | } |
| 66 | 85 | ||
| 67 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | 86 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { |
| @@ -76,7 +95,19 @@ namespace std { | |||
| 76 | template <> | 95 | template <> |
| 77 | struct hash<OpenGL::BaseBindings> { | 96 | struct hash<OpenGL::BaseBindings> { |
| 78 | std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { | 97 | std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { |
| 79 | return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16; | 98 | return static_cast<std::size_t>(bindings.cbuf) ^ |
| 99 | (static_cast<std::size_t>(bindings.gmem) << 8) ^ | ||
| 100 | (static_cast<std::size_t>(bindings.sampler) << 16) ^ | ||
| 101 | (static_cast<std::size_t>(bindings.image) << 24); | ||
| 102 | } | ||
| 103 | }; | ||
| 104 | |||
| 105 | template <> | ||
| 106 | struct hash<OpenGL::ProgramVariant> { | ||
| 107 | std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { | ||
| 108 | return std::hash<OpenGL::BaseBindings>()(variant.base_bindings) ^ | ||
| 109 | std::hash<OpenGL::TextureBufferUsage>()(variant.texture_buffer_usage) ^ | ||
| 110 | (static_cast<std::size_t>(variant.primitive_mode) << 6); | ||
| 80 | } | 111 | } |
| 81 | }; | 112 | }; |
| 82 | 113 | ||
| @@ -84,7 +115,7 @@ template <> | |||
| 84 | struct hash<OpenGL::ShaderDiskCacheUsage> { | 115 | struct hash<OpenGL::ShaderDiskCacheUsage> { |
| 85 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { | 116 | std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { |
| 86 | return static_cast<std::size_t>(usage.unique_identifier) ^ | 117 | return static_cast<std::size_t>(usage.unique_identifier) ^ |
| 87 | std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16; | 118 | std::hash<OpenGL::ProgramVariant>()(usage.variant); |
| 88 | } | 119 | } |
| 89 | }; | 120 | }; |
| 90 | 121 | ||
| @@ -275,26 +306,17 @@ private: | |||
| 275 | return LoadArrayFromPrecompiled(&object, 1); | 306 | return LoadArrayFromPrecompiled(&object, 1); |
| 276 | } | 307 | } |
| 277 | 308 | ||
| 278 | bool LoadObjectFromPrecompiled(bool& object) { | ||
| 279 | u8 value; | ||
| 280 | const bool read_ok = LoadArrayFromPrecompiled(&value, 1); | ||
| 281 | if (!read_ok) { | ||
| 282 | return false; | ||
| 283 | } | ||
| 284 | |||
| 285 | object = value != 0; | ||
| 286 | return true; | ||
| 287 | } | ||
| 288 | |||
| 289 | // Core system | ||
| 290 | Core::System& system; | 309 | Core::System& system; |
| 291 | // Stored transferable shaders | 310 | |
| 292 | std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | 311 | // Stores whole precompiled cache which will be read from or saved to the precompiled chache |
| 293 | // Stores whole precompiled cache which will be read from/saved to the precompiled cache file | 312 | // file |
| 294 | FileSys::VectorVfsFile precompiled_cache_virtual_file; | 313 | FileSys::VectorVfsFile precompiled_cache_virtual_file; |
| 295 | // Stores the current offset of the precompiled cache file for IO purposes | 314 | // Stores the current offset of the precompiled cache file for IO purposes |
| 296 | std::size_t precompiled_cache_virtual_file_offset = 0; | 315 | std::size_t precompiled_cache_virtual_file_offset = 0; |
| 297 | 316 | ||
| 317 | // Stored transferable shaders | ||
| 318 | std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; | ||
| 319 | |||
| 298 | // The cache has been loaded at boot | 320 | // The cache has been loaded at boot |
| 299 | bool tried_to_load{}; | 321 | bool tried_to_load{}; |
| 300 | }; | 322 | }; |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index d0b14b3f6..35ba334e4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -15,7 +15,8 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | |||
| 15 | 15 | ||
| 16 | namespace OpenGL { | 16 | namespace OpenGL { |
| 17 | 17 | ||
| 18 | OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent) | 18 | OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, |
| 19 | bool use_persistent) | ||
| 19 | : buffer_size(size) { | 20 | : buffer_size(size) { |
| 20 | gl_buffer.Create(); | 21 | gl_buffer.Create(); |
| 21 | 22 | ||
| @@ -29,7 +30,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p | |||
| 29 | allocate_size *= 2; | 30 | allocate_size *= 2; |
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | if (GLAD_GL_ARB_buffer_storage) { | 33 | if (use_persistent) { |
| 33 | persistent = true; | 34 | persistent = true; |
| 34 | coherent = prefer_coherent; | 35 | coherent = prefer_coherent; |
| 35 | const GLbitfield flags = | 36 | const GLbitfield flags = |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 3d18ecb4d..f8383cbd4 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -13,7 +13,8 @@ namespace OpenGL { | |||
| 13 | 13 | ||
| 14 | class OGLStreamBuffer : private NonCopyable { | 14 | class OGLStreamBuffer : private NonCopyable { |
| 15 | public: | 15 | public: |
| 16 | explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false); | 16 | explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, |
| 17 | bool use_persistent = true); | ||
| 17 | ~OGLStreamBuffer(); | 18 | ~OGLStreamBuffer(); |
| 18 | 19 | ||
| 19 | GLuint GetHandle() const; | 20 | GLuint GetHandle() const; |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp new file mode 100644 index 000000000..08ae1a429 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -0,0 +1,614 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/bit_util.h" | ||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "common/microprofile.h" | ||
| 9 | #include "common/scope_exit.h" | ||
| 10 | #include "core/core.h" | ||
| 11 | #include "video_core/morton.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_state.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 15 | #include "video_core/renderer_opengl/utils.h" | ||
| 16 | #include "video_core/texture_cache/surface_base.h" | ||
| 17 | #include "video_core/texture_cache/texture_cache.h" | ||
| 18 | #include "video_core/textures/convert.h" | ||
| 19 | #include "video_core/textures/texture.h" | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | using Tegra::Texture::SwizzleSource; | ||
| 24 | using VideoCore::MortonSwizzleMode; | ||
| 25 | |||
| 26 | using VideoCore::Surface::ComponentType; | ||
| 27 | using VideoCore::Surface::PixelFormat; | ||
| 28 | using VideoCore::Surface::SurfaceCompression; | ||
| 29 | using VideoCore::Surface::SurfaceTarget; | ||
| 30 | using VideoCore::Surface::SurfaceType; | ||
| 31 | |||
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | ||
| 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | ||
| 34 | |||
| 35 | namespace { | ||
| 36 | |||
| 37 | struct FormatTuple { | ||
| 38 | GLint internal_format; | ||
| 39 | GLenum format; | ||
| 40 | GLenum type; | ||
| 41 | ComponentType component_type; | ||
| 42 | bool compressed; | ||
| 43 | }; | ||
| 44 | |||
| 45 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | ||
| 46 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U | ||
| 47 | {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S | ||
| 48 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI | ||
| 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U | ||
| 50 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, | ||
| 51 | false}, // A2B10G10R10U | ||
| 52 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U | ||
| 53 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U | ||
| 54 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI | ||
| 55 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F | ||
| 56 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U | ||
| 57 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI | ||
| 58 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, | ||
| 59 | false}, // R11FG11FB10F | ||
| 60 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI | ||
| 61 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 62 | true}, // DXT1 | ||
| 63 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 64 | true}, // DXT23 | ||
| 65 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 66 | true}, // DXT45 | ||
| 67 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 | ||
| 68 | {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 69 | true}, // DXN2UNORM | ||
| 70 | {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM | ||
| 71 | {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 72 | true}, // BC7U | ||
| 73 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 74 | true}, // BC6H_UF16 | ||
| 75 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, | ||
| 76 | true}, // BC6H_SF16 | ||
| 77 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 | ||
| 78 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 79 | {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F | ||
| 80 | {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F | ||
| 81 | {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F | ||
| 82 | {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F | ||
| 83 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U | ||
| 84 | {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S | ||
| 85 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI | ||
| 86 | {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I | ||
| 87 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 | ||
| 88 | {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F | ||
| 89 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI | ||
| 90 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I | ||
| 91 | {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S | ||
| 92 | {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F | ||
| 93 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, | ||
| 94 | false}, // RGBA8_SRGB | ||
| 95 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U | ||
| 96 | {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S | ||
| 97 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI | ||
| 98 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI | ||
| 99 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 | ||
| 100 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 | ||
| 101 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 | ||
| 102 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 | ||
| 103 | // Compressed sRGB formats | ||
| 104 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 105 | true}, // DXT1_SRGB | ||
| 106 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 107 | true}, // DXT23_SRGB | ||
| 108 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 109 | true}, // DXT45_SRGB | ||
| 110 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | ||
| 111 | true}, // BC7U_SRGB | ||
| 112 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB | ||
| 113 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB | ||
| 114 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB | ||
| 115 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB | ||
| 116 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 | ||
| 117 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB | ||
| 118 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 | ||
| 119 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB | ||
| 120 | |||
| 121 | // Depth formats | ||
| 122 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | ||
| 123 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, | ||
| 124 | false}, // Z16 | ||
| 125 | |||
| 126 | // DepthStencil formats | ||
| 127 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 128 | false}, // Z24S8 | ||
| 129 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, | ||
| 130 | false}, // S8Z24 | ||
| 131 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, | ||
| 132 | ComponentType::Float, false}, // Z32FS8 | ||
| 133 | }}; | ||
| 134 | |||
| 135 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { | ||
| 136 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||
| 137 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; | ||
| 138 | ASSERT(component_type == format.component_type); | ||
| 139 | return format; | ||
| 140 | } | ||
| 141 | |||
| 142 | GLenum GetTextureTarget(const SurfaceTarget& target) { | ||
| 143 | switch (target) { | ||
| 144 | case SurfaceTarget::TextureBuffer: | ||
| 145 | return GL_TEXTURE_BUFFER; | ||
| 146 | case SurfaceTarget::Texture1D: | ||
| 147 | return GL_TEXTURE_1D; | ||
| 148 | case SurfaceTarget::Texture2D: | ||
| 149 | return GL_TEXTURE_2D; | ||
| 150 | case SurfaceTarget::Texture3D: | ||
| 151 | return GL_TEXTURE_3D; | ||
| 152 | case SurfaceTarget::Texture1DArray: | ||
| 153 | return GL_TEXTURE_1D_ARRAY; | ||
| 154 | case SurfaceTarget::Texture2DArray: | ||
| 155 | return GL_TEXTURE_2D_ARRAY; | ||
| 156 | case SurfaceTarget::TextureCubemap: | ||
| 157 | return GL_TEXTURE_CUBE_MAP; | ||
| 158 | case SurfaceTarget::TextureCubeArray: | ||
| 159 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 160 | } | ||
| 161 | UNREACHABLE(); | ||
| 162 | return {}; | ||
| 163 | } | ||
| 164 | |||
| 165 | GLint GetSwizzleSource(SwizzleSource source) { | ||
| 166 | switch (source) { | ||
| 167 | case SwizzleSource::Zero: | ||
| 168 | return GL_ZERO; | ||
| 169 | case SwizzleSource::R: | ||
| 170 | return GL_RED; | ||
| 171 | case SwizzleSource::G: | ||
| 172 | return GL_GREEN; | ||
| 173 | case SwizzleSource::B: | ||
| 174 | return GL_BLUE; | ||
| 175 | case SwizzleSource::A: | ||
| 176 | return GL_ALPHA; | ||
| 177 | case SwizzleSource::OneInt: | ||
| 178 | case SwizzleSource::OneFloat: | ||
| 179 | return GL_ONE; | ||
| 180 | } | ||
| 181 | UNREACHABLE(); | ||
| 182 | return GL_NONE; | ||
| 183 | } | ||
| 184 | |||
| 185 | void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { | ||
| 186 | glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | ||
| 187 | glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | ||
| 188 | glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | ||
| 189 | glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | ||
| 190 | glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); | ||
| 191 | if (params.num_levels == 1) { | ||
| 192 | glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, | ||
| 197 | OGLBuffer& texture_buffer) { | ||
| 198 | OGLTexture texture; | ||
| 199 | texture.Create(target); | ||
| 200 | |||
| 201 | switch (params.target) { | ||
| 202 | case SurfaceTarget::Texture1D: | ||
| 203 | glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); | ||
| 204 | break; | ||
| 205 | case SurfaceTarget::TextureBuffer: | ||
| 206 | texture_buffer.Create(); | ||
| 207 | glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), | ||
| 208 | nullptr, GL_DYNAMIC_STORAGE_BIT); | ||
| 209 | glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); | ||
| 210 | case SurfaceTarget::Texture2D: | ||
| 211 | case SurfaceTarget::TextureCubemap: | ||
| 212 | glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, | ||
| 213 | params.height); | ||
| 214 | break; | ||
| 215 | case SurfaceTarget::Texture3D: | ||
| 216 | case SurfaceTarget::Texture2DArray: | ||
| 217 | case SurfaceTarget::TextureCubeArray: | ||
| 218 | glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, | ||
| 219 | params.height, params.depth); | ||
| 220 | break; | ||
| 221 | default: | ||
| 222 | UNREACHABLE(); | ||
| 223 | } | ||
| 224 | |||
| 225 | ApplyTextureDefaults(params, texture.handle); | ||
| 226 | |||
| 227 | return texture; | ||
| 228 | } | ||
| 229 | |||
| 230 | } // Anonymous namespace | ||
| 231 | |||
| 232 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 233 | : VideoCommon::SurfaceBase<View>(gpu_addr, params) { | ||
| 234 | const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; | ||
| 235 | internal_format = tuple.internal_format; | ||
| 236 | format = tuple.format; | ||
| 237 | type = tuple.type; | ||
| 238 | is_compressed = tuple.compressed; | ||
| 239 | target = GetTextureTarget(params.target); | ||
| 240 | texture = CreateTexture(params, target, internal_format, texture_buffer); | ||
| 241 | DecorateSurfaceName(); | ||
| 242 | main_view = CreateViewInner( | ||
| 243 | ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), | ||
| 244 | true); | ||
| 245 | } | ||
| 246 | |||
| 247 | CachedSurface::~CachedSurface() = default; | ||
| 248 | |||
| 249 | void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | ||
| 250 | MICROPROFILE_SCOPE(OpenGL_Texture_Download); | ||
| 251 | |||
| 252 | SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); | ||
| 253 | |||
| 254 | for (u32 level = 0; level < params.emulated_levels; ++level) { | ||
| 255 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | ||
| 256 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | ||
| 257 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); | ||
| 258 | if (is_compressed) { | ||
| 259 | glGetCompressedTextureImage(texture.handle, level, | ||
| 260 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 261 | staging_buffer.data() + mip_offset); | ||
| 262 | } else { | ||
| 263 | glGetTextureImage(texture.handle, level, format, type, | ||
| 264 | static_cast<GLsizei>(params.GetHostMipmapSize(level)), | ||
| 265 | staging_buffer.data() + mip_offset); | ||
| 266 | } | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | ||
| 271 | MICROPROFILE_SCOPE(OpenGL_Texture_Upload); | ||
| 272 | SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); | ||
| 273 | for (u32 level = 0; level < params.emulated_levels; ++level) { | ||
| 274 | UploadTextureMipmap(level, staging_buffer); | ||
| 275 | } | ||
| 276 | } | ||
| 277 | |||
| 278 | void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { | ||
| 279 | glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | ||
| 280 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | ||
| 281 | |||
| 282 | auto compression_type = params.GetCompressionType(); | ||
| 283 | |||
| 284 | const std::size_t mip_offset = compression_type == SurfaceCompression::Converted | ||
| 285 | ? params.GetConvertedMipmapOffset(level) | ||
| 286 | : params.GetHostMipmapLevelOffset(level); | ||
| 287 | const u8* buffer{staging_buffer.data() + mip_offset}; | ||
| 288 | if (is_compressed) { | ||
| 289 | const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; | ||
| 290 | switch (params.target) { | ||
| 291 | case SurfaceTarget::Texture2D: | ||
| 292 | glCompressedTextureSubImage2D(texture.handle, level, 0, 0, | ||
| 293 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 294 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 295 | internal_format, image_size, buffer); | ||
| 296 | break; | ||
| 297 | case SurfaceTarget::Texture3D: | ||
| 298 | case SurfaceTarget::Texture2DArray: | ||
| 299 | case SurfaceTarget::TextureCubeArray: | ||
| 300 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, | ||
| 301 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 302 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 303 | static_cast<GLsizei>(params.GetMipDepth(level)), | ||
| 304 | internal_format, image_size, buffer); | ||
| 305 | break; | ||
| 306 | case SurfaceTarget::TextureCubemap: { | ||
| 307 | const std::size_t layer_size{params.GetHostLayerSize(level)}; | ||
| 308 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 309 | glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 310 | static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 311 | static_cast<GLsizei>(params.GetMipHeight(level)), 1, | ||
| 312 | internal_format, static_cast<GLsizei>(layer_size), | ||
| 313 | buffer); | ||
| 314 | buffer += layer_size; | ||
| 315 | } | ||
| 316 | break; | ||
| 317 | } | ||
| 318 | default: | ||
| 319 | UNREACHABLE(); | ||
| 320 | } | ||
| 321 | } else { | ||
| 322 | switch (params.target) { | ||
| 323 | case SurfaceTarget::Texture1D: | ||
| 324 | glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, | ||
| 325 | buffer); | ||
| 326 | break; | ||
| 327 | case SurfaceTarget::TextureBuffer: | ||
| 328 | ASSERT(level == 0); | ||
| 329 | glNamedBufferSubData(texture_buffer.handle, 0, | ||
| 330 | params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); | ||
| 331 | break; | ||
| 332 | case SurfaceTarget::Texture1DArray: | ||
| 333 | case SurfaceTarget::Texture2D: | ||
| 334 | glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), | ||
| 335 | params.GetMipHeight(level), format, type, buffer); | ||
| 336 | break; | ||
| 337 | case SurfaceTarget::Texture3D: | ||
| 338 | case SurfaceTarget::Texture2DArray: | ||
| 339 | case SurfaceTarget::TextureCubeArray: | ||
| 340 | glTextureSubImage3D( | ||
| 341 | texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), | ||
| 342 | static_cast<GLsizei>(params.GetMipHeight(level)), | ||
| 343 | static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); | ||
| 344 | break; | ||
| 345 | case SurfaceTarget::TextureCubemap: | ||
| 346 | for (std::size_t face = 0; face < params.depth; ++face) { | ||
| 347 | glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), | ||
| 348 | params.GetMipWidth(level), params.GetMipHeight(level), 1, | ||
| 349 | format, type, buffer); | ||
| 350 | buffer += params.GetHostLayerSize(level); | ||
| 351 | } | ||
| 352 | break; | ||
| 353 | default: | ||
| 354 | UNREACHABLE(); | ||
| 355 | } | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | void CachedSurface::DecorateSurfaceName() { | ||
| 360 | LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); | ||
| 361 | } | ||
| 362 | |||
| 363 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { | ||
| 364 | LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); | ||
| 365 | } | ||
| 366 | |||
| 367 | View CachedSurface::CreateView(const ViewParams& view_key) { | ||
| 368 | return CreateViewInner(view_key, false); | ||
| 369 | } | ||
| 370 | |||
| 371 | View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { | ||
| 372 | auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); | ||
| 373 | views[view_key] = view; | ||
| 374 | if (!is_proxy) | ||
| 375 | view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); | ||
| 376 | return view; | ||
| 377 | } | ||
| 378 | |||
| 379 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, | ||
| 380 | const bool is_proxy) | ||
| 381 | : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { | ||
| 382 | target = GetTextureTarget(params.target); | ||
| 383 | if (!is_proxy) { | ||
| 384 | texture_view = CreateTextureView(); | ||
| 385 | } | ||
| 386 | swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); | ||
| 387 | } | ||
| 388 | |||
| 389 | CachedSurfaceView::~CachedSurfaceView() = default; | ||
| 390 | |||
| 391 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | ||
| 392 | ASSERT(params.num_layers == 1 && params.num_levels == 1); | ||
| 393 | |||
| 394 | const auto& owner_params = surface.GetSurfaceParams(); | ||
| 395 | |||
| 396 | switch (owner_params.target) { | ||
| 397 | case SurfaceTarget::Texture1D: | ||
| 398 | glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), | ||
| 399 | params.base_level); | ||
| 400 | break; | ||
| 401 | case SurfaceTarget::Texture2D: | ||
| 402 | glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), | ||
| 403 | params.base_level); | ||
| 404 | break; | ||
| 405 | case SurfaceTarget::Texture1DArray: | ||
| 406 | case SurfaceTarget::Texture2DArray: | ||
| 407 | case SurfaceTarget::TextureCubemap: | ||
| 408 | case SurfaceTarget::TextureCubeArray: | ||
| 409 | glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, | ||
| 410 | params.base_layer); | ||
| 411 | break; | ||
| 412 | default: | ||
| 413 | UNIMPLEMENTED(); | ||
| 414 | } | ||
| 415 | } | ||
| 416 | |||
| 417 | void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, | ||
| 418 | SwizzleSource z_source, SwizzleSource w_source) { | ||
| 419 | u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | ||
| 420 | if (new_swizzle == swizzle) | ||
| 421 | return; | ||
| 422 | swizzle = new_swizzle; | ||
| 423 | const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), | ||
| 424 | GetSwizzleSource(z_source), | ||
| 425 | GetSwizzleSource(w_source)}; | ||
| 426 | const GLuint handle = GetTexture(); | ||
| 427 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 428 | } | ||
| 429 | |||
| 430 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | ||
| 431 | const auto& owner_params = surface.GetSurfaceParams(); | ||
| 432 | OGLTextureView texture_view; | ||
| 433 | texture_view.Create(); | ||
| 434 | |||
| 435 | const GLuint handle{texture_view.handle}; | ||
| 436 | const FormatTuple& tuple{ | ||
| 437 | GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; | ||
| 438 | |||
| 439 | glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, | ||
| 440 | params.num_levels, params.base_layer, params.num_layers); | ||
| 441 | |||
| 442 | ApplyTextureDefaults(owner_params, handle); | ||
| 443 | |||
| 444 | return texture_view; | ||
| 445 | } | ||
| 446 | |||
| 447 | TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, | ||
| 448 | VideoCore::RasterizerInterface& rasterizer, | ||
| 449 | const Device& device) | ||
| 450 | : TextureCacheBase{system, rasterizer} { | ||
| 451 | src_framebuffer.Create(); | ||
| 452 | dst_framebuffer.Create(); | ||
| 453 | } | ||
| 454 | |||
| 455 | TextureCacheOpenGL::~TextureCacheOpenGL() = default; | ||
| 456 | |||
| 457 | Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 458 | return std::make_shared<CachedSurface>(gpu_addr, params); | ||
| 459 | } | ||
| 460 | |||
| 461 | void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 462 | const VideoCommon::CopyParams& copy_params) { | ||
| 463 | const auto& src_params = src_surface->GetSurfaceParams(); | ||
| 464 | const auto& dst_params = dst_surface->GetSurfaceParams(); | ||
| 465 | if (src_params.type != dst_params.type) { | ||
| 466 | // A fallback is needed | ||
| 467 | return; | ||
| 468 | } | ||
| 469 | const auto src_handle = src_surface->GetTexture(); | ||
| 470 | const auto src_target = src_surface->GetTarget(); | ||
| 471 | const auto dst_handle = dst_surface->GetTexture(); | ||
| 472 | const auto dst_target = dst_surface->GetTarget(); | ||
| 473 | glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, | ||
| 474 | copy_params.source_y, copy_params.source_z, dst_handle, dst_target, | ||
| 475 | copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, | ||
| 476 | copy_params.dest_z, copy_params.width, copy_params.height, | ||
| 477 | copy_params.depth); | ||
| 478 | } | ||
| 479 | |||
| 480 | void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | ||
| 481 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 482 | const auto& src_params{src_view->GetSurfaceParams()}; | ||
| 483 | const auto& dst_params{dst_view->GetSurfaceParams()}; | ||
| 484 | |||
| 485 | OpenGLState prev_state{OpenGLState::GetCurState()}; | ||
| 486 | SCOPE_EXIT({ prev_state.Apply(); }); | ||
| 487 | |||
| 488 | OpenGLState state; | ||
| 489 | state.draw.read_framebuffer = src_framebuffer.handle; | ||
| 490 | state.draw.draw_framebuffer = dst_framebuffer.handle; | ||
| 491 | state.Apply(); | ||
| 492 | |||
| 493 | u32 buffers{}; | ||
| 494 | |||
| 495 | UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); | ||
| 496 | UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); | ||
| 497 | |||
| 498 | if (src_params.type == SurfaceType::ColorTexture) { | ||
| 499 | src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); | ||
| 500 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 501 | 0); | ||
| 502 | |||
| 503 | dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); | ||
| 504 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, | ||
| 505 | 0); | ||
| 506 | |||
| 507 | buffers = GL_COLOR_BUFFER_BIT; | ||
| 508 | } else if (src_params.type == SurfaceType::Depth) { | ||
| 509 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 510 | src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 511 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 512 | |||
| 513 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 514 | dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 515 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); | ||
| 516 | |||
| 517 | buffers = GL_DEPTH_BUFFER_BIT; | ||
| 518 | } else if (src_params.type == SurfaceType::DepthStencil) { | ||
| 519 | glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 520 | src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); | ||
| 521 | |||
| 522 | glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); | ||
| 523 | dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); | ||
| 524 | |||
| 525 | buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; | ||
| 526 | } | ||
| 527 | |||
| 528 | const Common::Rectangle<u32>& src_rect = copy_config.src_rect; | ||
| 529 | const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; | ||
| 530 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | ||
| 531 | |||
| 532 | glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, | ||
| 533 | dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, | ||
| 534 | is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); | ||
| 535 | } | ||
| 536 | |||
| 537 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | ||
| 538 | const auto& src_params = src_surface->GetSurfaceParams(); | ||
| 539 | const auto& dst_params = dst_surface->GetSurfaceParams(); | ||
| 540 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | ||
| 541 | |||
| 542 | const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); | ||
| 543 | const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); | ||
| 544 | |||
| 545 | const std::size_t source_size = src_surface->GetHostSizeInBytes(); | ||
| 546 | const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); | ||
| 547 | |||
| 548 | const std::size_t buffer_size = std::max(source_size, dest_size); | ||
| 549 | |||
| 550 | GLuint copy_pbo_handle = FetchPBO(buffer_size); | ||
| 551 | |||
| 552 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | ||
| 553 | |||
| 554 | if (source_format.compressed) { | ||
| 555 | glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), | ||
| 556 | nullptr); | ||
| 557 | } else { | ||
| 558 | glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, | ||
| 559 | static_cast<GLsizei>(source_size), nullptr); | ||
| 560 | } | ||
| 561 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 562 | |||
| 563 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); | ||
| 564 | |||
| 565 | const GLsizei width = static_cast<GLsizei>(dst_params.width); | ||
| 566 | const GLsizei height = static_cast<GLsizei>(dst_params.height); | ||
| 567 | const GLsizei depth = static_cast<GLsizei>(dst_params.depth); | ||
| 568 | if (dest_format.compressed) { | ||
| 569 | LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); | ||
| 570 | UNREACHABLE(); | ||
| 571 | } else { | ||
| 572 | switch (dst_params.target) { | ||
| 573 | case SurfaceTarget::Texture1D: | ||
| 574 | glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, | ||
| 575 | dest_format.type, nullptr); | ||
| 576 | break; | ||
| 577 | case SurfaceTarget::Texture2D: | ||
| 578 | glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, | ||
| 579 | dest_format.format, dest_format.type, nullptr); | ||
| 580 | break; | ||
| 581 | case SurfaceTarget::Texture3D: | ||
| 582 | case SurfaceTarget::Texture2DArray: | ||
| 583 | case SurfaceTarget::TextureCubeArray: | ||
| 584 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 585 | dest_format.format, dest_format.type, nullptr); | ||
| 586 | break; | ||
| 587 | case SurfaceTarget::TextureCubemap: | ||
| 588 | glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, | ||
| 589 | dest_format.format, dest_format.type, nullptr); | ||
| 590 | break; | ||
| 591 | default: | ||
| 592 | LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", | ||
| 593 | static_cast<u32>(dst_params.target)); | ||
| 594 | UNREACHABLE(); | ||
| 595 | } | ||
| 596 | } | ||
| 597 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 598 | |||
| 599 | glTextureBarrier(); | ||
| 600 | } | ||
| 601 | |||
| 602 | GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { | ||
| 603 | ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); | ||
| 604 | const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); | ||
| 605 | OGLBuffer& cp = copy_pbo_cache[l2]; | ||
| 606 | if (cp.handle == 0) { | ||
| 607 | const std::size_t ceil_size = 1ULL << l2; | ||
| 608 | cp.Create(); | ||
| 609 | cp.MakeStreamCopy(ceil_size); | ||
| 610 | } | ||
| 611 | return cp.handle; | ||
| 612 | } | ||
| 613 | |||
| 614 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h new file mode 100644 index 000000000..ff6ab6988 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <functional> | ||
| 9 | #include <memory> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <glad/glad.h> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 20 | #include "video_core/texture_cache/texture_cache.h" | ||
| 21 | |||
| 22 | namespace OpenGL { | ||
| 23 | |||
| 24 | using VideoCommon::SurfaceParams; | ||
| 25 | using VideoCommon::ViewParams; | ||
| 26 | |||
| 27 | class CachedSurfaceView; | ||
| 28 | class CachedSurface; | ||
| 29 | class TextureCacheOpenGL; | ||
| 30 | |||
| 31 | using Surface = std::shared_ptr<CachedSurface>; | ||
| 32 | using View = std::shared_ptr<CachedSurfaceView>; | ||
| 33 | using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; | ||
| 34 | |||
| 35 | class CachedSurface final : public VideoCommon::SurfaceBase<View> { | ||
| 36 | friend CachedSurfaceView; | ||
| 37 | |||
| 38 | public: | ||
| 39 | explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); | ||
| 40 | ~CachedSurface(); | ||
| 41 | |||
| 42 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | ||
| 43 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | ||
| 44 | |||
| 45 | GLenum GetTarget() const { | ||
| 46 | return target; | ||
| 47 | } | ||
| 48 | |||
| 49 | GLuint GetTexture() const { | ||
| 50 | return texture.handle; | ||
| 51 | } | ||
| 52 | |||
| 53 | protected: | ||
| 54 | void DecorateSurfaceName(); | ||
| 55 | |||
| 56 | View CreateView(const ViewParams& view_key) override; | ||
| 57 | View CreateViewInner(const ViewParams& view_key, bool is_proxy); | ||
| 58 | |||
| 59 | private: | ||
| 60 | void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); | ||
| 61 | |||
| 62 | GLenum internal_format{}; | ||
| 63 | GLenum format{}; | ||
| 64 | GLenum type{}; | ||
| 65 | bool is_compressed{}; | ||
| 66 | GLenum target{}; | ||
| 67 | u32 view_count{}; | ||
| 68 | |||
| 69 | OGLTexture texture; | ||
| 70 | OGLBuffer texture_buffer; | ||
| 71 | }; | ||
| 72 | |||
| 73 | class CachedSurfaceView final : public VideoCommon::ViewBase { | ||
| 74 | public: | ||
| 75 | explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); | ||
| 76 | ~CachedSurfaceView(); | ||
| 77 | |||
| 78 | /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER | ||
| 79 | void Attach(GLenum attachment, GLenum target) const; | ||
| 80 | |||
| 81 | GLuint GetTexture() const { | ||
| 82 | if (is_proxy) { | ||
| 83 | return surface.GetTexture(); | ||
| 84 | } | ||
| 85 | return texture_view.handle; | ||
| 86 | } | ||
| 87 | |||
| 88 | const SurfaceParams& GetSurfaceParams() const { | ||
| 89 | return surface.GetSurfaceParams(); | ||
| 90 | } | ||
| 91 | |||
| 92 | void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 93 | Tegra::Texture::SwizzleSource y_source, | ||
| 94 | Tegra::Texture::SwizzleSource z_source, | ||
| 95 | Tegra::Texture::SwizzleSource w_source); | ||
| 96 | |||
| 97 | void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); | ||
| 98 | |||
| 99 | private: | ||
| 100 | u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 101 | Tegra::Texture::SwizzleSource y_source, | ||
| 102 | Tegra::Texture::SwizzleSource z_source, | ||
| 103 | Tegra::Texture::SwizzleSource w_source) const { | ||
| 104 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 105 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 106 | } | ||
| 107 | |||
| 108 | OGLTextureView CreateTextureView() const; | ||
| 109 | |||
| 110 | CachedSurface& surface; | ||
| 111 | GLenum target{}; | ||
| 112 | |||
| 113 | OGLTextureView texture_view; | ||
| 114 | u32 swizzle; | ||
| 115 | bool is_proxy; | ||
| 116 | }; | ||
| 117 | |||
| 118 | class TextureCacheOpenGL final : public TextureCacheBase { | ||
| 119 | public: | ||
| 120 | explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 121 | const Device& device); | ||
| 122 | ~TextureCacheOpenGL(); | ||
| 123 | |||
| 124 | protected: | ||
| 125 | Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; | ||
| 126 | |||
| 127 | void ImageCopy(Surface& src_surface, Surface& dst_surface, | ||
| 128 | const VideoCommon::CopyParams& copy_params) override; | ||
| 129 | |||
| 130 | void ImageBlit(View& src_view, View& dst_view, | ||
| 131 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||
| 132 | |||
| 133 | void BufferCopy(Surface& src_surface, Surface& dst_surface) override; | ||
| 134 | |||
| 135 | private: | ||
| 136 | GLuint FetchPBO(std::size_t buffer_size); | ||
| 137 | |||
| 138 | OGLFramebuffer src_framebuffer; | ||
| 139 | OGLFramebuffer dst_framebuffer; | ||
| 140 | std::unordered_map<u32, OGLBuffer> copy_pbo_cache; | ||
| 141 | }; | ||
| 142 | |||
| 143 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index aafd6f31b..b142521ec 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -471,7 +471,6 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum | |||
| 471 | } | 471 | } |
| 472 | } | 472 | } |
| 473 | 473 | ||
| 474 | /// Initialize the renderer | ||
| 475 | bool RendererOpenGL::Init() { | 474 | bool RendererOpenGL::Init() { |
| 476 | Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; | 475 | Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; |
| 477 | 476 | ||
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index f23fc9f9d..68c36988d 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -5,8 +5,10 @@ | |||
| 5 | #include <string> | 5 | #include <string> |
| 6 | #include <fmt/format.h> | 6 | #include <fmt/format.h> |
| 7 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 8 | |||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/scope_exit.h" | ||
| 10 | #include "video_core/renderer_opengl/utils.h" | 12 | #include "video_core/renderer_opengl/utils.h" |
| 11 | 13 | ||
| 12 | namespace OpenGL { | 14 | namespace OpenGL { |
| @@ -63,4 +65,4 @@ void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_vie | |||
| 63 | glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); | 65 | glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str())); |
| 64 | } | 66 | } |
| 65 | 67 | ||
| 66 | } // namespace OpenGL \ No newline at end of file | 68 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index b3e9fc499..4a752f3b4 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -32,4 +32,4 @@ private: | |||
| 32 | 32 | ||
| 33 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); | 33 | void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); |
| 34 | 34 | ||
| 35 | } // namespace OpenGL \ No newline at end of file | 35 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 33ad9764a..97ce214b1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -935,6 +935,11 @@ private: | |||
| 935 | return {}; | 935 | return {}; |
| 936 | } | 936 | } |
| 937 | 937 | ||
| 938 | Id ImageStore(Operation operation) { | ||
| 939 | UNIMPLEMENTED(); | ||
| 940 | return {}; | ||
| 941 | } | ||
| 942 | |||
| 938 | Id Branch(Operation operation) { | 943 | Id Branch(Operation operation) { |
| 939 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); | 944 | const auto target = std::get_if<ImmediateNode>(&*operation[0]); |
| 940 | UNIMPLEMENTED_IF(!target); | 945 | UNIMPLEMENTED_IF(!target); |
| @@ -1326,6 +1331,8 @@ private: | |||
| 1326 | &SPIRVDecompiler::TextureQueryLod, | 1331 | &SPIRVDecompiler::TextureQueryLod, |
| 1327 | &SPIRVDecompiler::TexelFetch, | 1332 | &SPIRVDecompiler::TexelFetch, |
| 1328 | 1333 | ||
| 1334 | &SPIRVDecompiler::ImageStore, | ||
| 1335 | |||
| 1329 | &SPIRVDecompiler::Branch, | 1336 | &SPIRVDecompiler::Branch, |
| 1330 | &SPIRVDecompiler::PushFlowStack, | 1337 | &SPIRVDecompiler::PushFlowStack, |
| 1331 | &SPIRVDecompiler::PopFlowStack, | 1338 | &SPIRVDecompiler::PopFlowStack, |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index a0554c97e..2c9ff28f2 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -169,6 +169,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 169 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | 169 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
| 170 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | 170 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
| 171 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | 171 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, |
| 172 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | ||
| 172 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | 173 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |
| 173 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | 174 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |
| 174 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | 175 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp new file mode 100644 index 000000000..24f022cc0 --- /dev/null +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | using Tegra::Shader::Instruction; | ||
| 20 | using Tegra::Shader::OpCode; | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||
| 24 | switch (image_type) { | ||
| 25 | case Tegra::Shader::ImageType::Texture1D: | ||
| 26 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 27 | return 1; | ||
| 28 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 29 | case Tegra::Shader::ImageType::Texture2D: | ||
| 30 | return 2; | ||
| 31 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 32 | case Tegra::Shader::ImageType::Texture3D: | ||
| 33 | return 3; | ||
| 34 | } | ||
| 35 | UNREACHABLE(); | ||
| 36 | return 1; | ||
| 37 | } | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 40 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||
| 41 | const Instruction instr = {program_code[pc]}; | ||
| 42 | const auto opcode = OpCode::Decode(instr); | ||
| 43 | |||
| 44 | switch (opcode->get().GetId()) { | ||
| 45 | case OpCode::Id::SUST: { | ||
| 46 | UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 47 | UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); | ||
| 48 | UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 49 | UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store | ||
| 50 | |||
| 51 | std::vector<Node> values; | ||
| 52 | constexpr std::size_t hardcoded_size{4}; | ||
| 53 | for (std::size_t i = 0; i < hardcoded_size; ++i) { | ||
| 54 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | ||
| 55 | } | ||
| 56 | |||
| 57 | std::vector<Node> coords; | ||
| 58 | const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; | ||
| 59 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 60 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 61 | } | ||
| 62 | |||
| 63 | const auto type{instr.sust.image_type}; | ||
| 64 | const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) | ||
| 65 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 66 | MetaImage meta{image, values}; | ||
| 67 | const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; | ||
| 68 | bb.push_back(store); | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | default: | ||
| 72 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 73 | } | ||
| 74 | |||
| 75 | return pc; | ||
| 76 | } | ||
| 77 | |||
| 78 | const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | ||
| 79 | const auto offset{static_cast<std::size_t>(image.index.Value())}; | ||
| 80 | |||
| 81 | // If this image has already been used, return the existing mapping. | ||
| 82 | const auto itr{std::find_if(used_images.begin(), used_images.end(), | ||
| 83 | [=](const Image& entry) { return entry.GetOffset() == offset; })}; | ||
| 84 | if (itr != used_images.end()) { | ||
| 85 | ASSERT(itr->GetType() == type); | ||
| 86 | return *itr; | ||
| 87 | } | ||
| 88 | |||
| 89 | // Otherwise create a new mapping for this image. | ||
| 90 | const std::size_t next_index{used_images.size()}; | ||
| 91 | const Image entry{offset, next_index, type}; | ||
| 92 | return *used_images.emplace(entry).first; | ||
| 93 | } | ||
| 94 | |||
| 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, | ||
| 96 | Tegra::Shader::ImageType type) { | ||
| 97 | const Node image_register{GetRegister(reg)}; | ||
| 98 | const Node base_image{ | ||
| 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | ||
| 100 | const auto cbuf{std::get_if<CbufNode>(&*base_image)}; | ||
| 101 | const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; | ||
| 102 | const auto cbuf_offset{cbuf_offset_imm->GetValue()}; | ||
| 103 | const auto cbuf_index{cbuf->GetIndex()}; | ||
| 104 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | ||
| 105 | |||
| 106 | // If this image has already been used, return the existing mapping. | ||
| 107 | const auto itr{std::find_if(used_images.begin(), used_images.end(), | ||
| 108 | [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })}; | ||
| 109 | if (itr != used_images.end()) { | ||
| 110 | ASSERT(itr->GetType() == type); | ||
| 111 | return *itr; | ||
| 112 | } | ||
| 113 | |||
| 114 | // Otherwise create a new mapping for this image. | ||
| 115 | const std::size_t next_index{used_images.size()}; | ||
| 116 | const Image entry{cbuf_index, cbuf_offset, next_index, type}; | ||
| 117 | return *used_images.emplace(entry).first; | ||
| 118 | } | ||
| 119 | |||
| 120 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 4a356dbd4..cb480be9b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -245,6 +245,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 245 | } | 245 | } |
| 246 | break; | 246 | break; |
| 247 | } | 247 | } |
| 248 | case OpCode::Id::TLD: { | ||
| 249 | UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); | ||
| 250 | UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); | ||
| 251 | UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); | ||
| 252 | |||
| 253 | if (instr.tld.nodep_flag) { | ||
| 254 | LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete"); | ||
| 255 | } | ||
| 256 | |||
| 257 | WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); | ||
| 258 | break; | ||
| 259 | } | ||
| 248 | case OpCode::Id::TLDS: { | 260 | case OpCode::Id::TLDS: { |
| 249 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | 261 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; |
| 250 | const bool is_array{instr.tlds.IsArrayTexture()}; | 262 | const bool is_array{instr.tlds.IsArrayTexture()}; |
| @@ -575,6 +587,39 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 575 | return values; | 587 | return values; |
| 576 | } | 588 | } |
| 577 | 589 | ||
| 590 | Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||
| 591 | const auto texture_type{instr.tld.texture_type}; | ||
| 592 | const bool is_array{instr.tld.is_array}; | ||
| 593 | const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; | ||
| 594 | const std::size_t coord_count{GetCoordCount(texture_type)}; | ||
| 595 | |||
| 596 | u64 gpr8_cursor{instr.gpr8.Value()}; | ||
| 597 | const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; | ||
| 598 | |||
| 599 | std::vector<Node> coords; | ||
| 600 | coords.reserve(coord_count); | ||
| 601 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 602 | coords.push_back(GetRegister(gpr8_cursor++)); | ||
| 603 | } | ||
| 604 | |||
| 605 | u64 gpr20_cursor{instr.gpr20.Value()}; | ||
| 606 | // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 607 | const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; | ||
| 608 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 609 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 610 | |||
| 611 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 612 | |||
| 613 | Node4 values; | ||
| 614 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 615 | auto coords_copy = coords; | ||
| 616 | MetaTexture meta{sampler, array_register, {}, {}, {}, lod, {}, element}; | ||
| 617 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 618 | } | ||
| 619 | |||
| 620 | return values; | ||
| 621 | } | ||
| 622 | |||
| 578 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 623 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 579 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 624 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 580 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 625 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 3cfb911bb..0ac83fcf0 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -146,6 +146,8 @@ enum class OperationCode { | |||
| 146 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | 146 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 |
| 147 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | 147 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 |
| 148 | 148 | ||
| 149 | ImageStore, /// (MetaImage, float[N] coords) -> void | ||
| 150 | |||
| 149 | Branch, /// (uint branch_target) -> void | 151 | Branch, /// (uint branch_target) -> void |
| 150 | PushFlowStack, /// (uint branch_target) -> void | 152 | PushFlowStack, /// (uint branch_target) -> void |
| 151 | PopFlowStack, /// () -> void | 153 | PopFlowStack, /// () -> void |
| @@ -263,6 +265,48 @@ private: | |||
| 263 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. | 265 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. |
| 264 | }; | 266 | }; |
| 265 | 267 | ||
| 268 | class Image { | ||
| 269 | public: | ||
| 270 | explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type) | ||
| 271 | : offset{offset}, index{index}, type{type}, is_bindless{false} {} | ||
| 272 | |||
| 273 | explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, | ||
| 274 | Tegra::Shader::ImageType type) | ||
| 275 | : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, | ||
| 276 | is_bindless{true} {} | ||
| 277 | |||
| 278 | explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, | ||
| 279 | bool is_bindless) | ||
| 280 | : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {} | ||
| 281 | |||
| 282 | std::size_t GetOffset() const { | ||
| 283 | return offset; | ||
| 284 | } | ||
| 285 | |||
| 286 | std::size_t GetIndex() const { | ||
| 287 | return index; | ||
| 288 | } | ||
| 289 | |||
| 290 | Tegra::Shader::ImageType GetType() const { | ||
| 291 | return type; | ||
| 292 | } | ||
| 293 | |||
| 294 | bool IsBindless() const { | ||
| 295 | return is_bindless; | ||
| 296 | } | ||
| 297 | |||
| 298 | bool operator<(const Image& rhs) const { | ||
| 299 | return std::tie(offset, index, type, is_bindless) < | ||
| 300 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless); | ||
| 301 | } | ||
| 302 | |||
| 303 | private: | ||
| 304 | std::size_t offset{}; | ||
| 305 | std::size_t index{}; | ||
| 306 | Tegra::Shader::ImageType type{}; | ||
| 307 | bool is_bindless{}; | ||
| 308 | }; | ||
| 309 | |||
| 266 | struct GlobalMemoryBase { | 310 | struct GlobalMemoryBase { |
| 267 | u32 cbuf_index{}; | 311 | u32 cbuf_index{}; |
| 268 | u32 cbuf_offset{}; | 312 | u32 cbuf_offset{}; |
| @@ -289,8 +333,14 @@ struct MetaTexture { | |||
| 289 | u32 element{}; | 333 | u32 element{}; |
| 290 | }; | 334 | }; |
| 291 | 335 | ||
| 336 | struct MetaImage { | ||
| 337 | const Image& image; | ||
| 338 | std::vector<Node> values; | ||
| 339 | }; | ||
| 340 | |||
| 292 | /// Parameters that modify an operation but are not part of any particular operand | 341 | /// Parameters that modify an operation but are not part of any particular operand |
| 293 | using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; | 342 | using Meta = |
| 343 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; | ||
| 294 | 344 | ||
| 295 | /// Holds any kind of operation that can be done in the IR | 345 | /// Holds any kind of operation that can be done in the IR |
| 296 | class OperationNode final { | 346 | class OperationNode final { |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index edcf2288e..e22548208 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -104,6 +104,10 @@ public: | |||
| 104 | return used_samplers; | 104 | return used_samplers; |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | const std::set<Image>& GetImages() const { | ||
| 108 | return used_images; | ||
| 109 | } | ||
| 110 | |||
| 107 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | 111 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() |
| 108 | const { | 112 | const { |
| 109 | return used_clip_distances; | 113 | return used_clip_distances; |
| @@ -154,6 +158,7 @@ private: | |||
| 154 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | 158 | u32 DecodeConversion(NodeBlock& bb, u32 pc); |
| 155 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | 159 | u32 DecodeMemory(NodeBlock& bb, u32 pc); |
| 156 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | 160 | u32 DecodeTexture(NodeBlock& bb, u32 pc); |
| 161 | u32 DecodeImage(NodeBlock& bb, u32 pc); | ||
| 157 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | 162 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); |
| 158 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | 163 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); |
| 159 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | 164 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); |
| @@ -254,6 +259,12 @@ private: | |||
| 254 | Tegra::Shader::TextureType type, bool is_array, | 259 | Tegra::Shader::TextureType type, bool is_array, |
| 255 | bool is_shadow); | 260 | bool is_shadow); |
| 256 | 261 | ||
| 262 | /// Accesses an image. | ||
| 263 | const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | ||
| 264 | |||
| 265 | /// Access a bindless image sampler. | ||
| 266 | const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | ||
| 267 | |||
| 257 | /// Extracts a sequence of bits from a node | 268 | /// Extracts a sequence of bits from a node |
| 258 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 269 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| 259 | 270 | ||
| @@ -277,6 +288,8 @@ private: | |||
| 277 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 288 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 278 | bool depth_compare, bool is_array, bool is_aoffi); | 289 | bool depth_compare, bool is_array, bool is_aoffi); |
| 279 | 290 | ||
| 291 | Node4 GetTldCode(Tegra::Shader::Instruction instr); | ||
| 292 | |||
| 280 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 293 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 281 | bool is_array); | 294 | bool is_array); |
| 282 | 295 | ||
| @@ -327,6 +340,7 @@ private: | |||
| 327 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | 340 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; |
| 328 | std::map<u32, ConstBuffer> used_cbufs; | 341 | std::map<u32, ConstBuffer> used_cbufs; |
| 329 | std::set<Sampler> used_samplers; | 342 | std::set<Sampler> used_samplers; |
| 343 | std::set<Image> used_images; | ||
| 330 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 344 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 331 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 345 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 332 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 346 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6384fa8d2..c50f6354d 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -12,6 +12,8 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t | |||
| 12 | switch (texture_type) { | 12 | switch (texture_type) { |
| 13 | case Tegra::Texture::TextureType::Texture1D: | 13 | case Tegra::Texture::TextureType::Texture1D: |
| 14 | return SurfaceTarget::Texture1D; | 14 | return SurfaceTarget::Texture1D; |
| 15 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 16 | return SurfaceTarget::TextureBuffer; | ||
| 15 | case Tegra::Texture::TextureType::Texture2D: | 17 | case Tegra::Texture::TextureType::Texture2D: |
| 16 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | 18 | case Tegra::Texture::TextureType::Texture2DNoMipmap: |
| 17 | return SurfaceTarget::Texture2D; | 19 | return SurfaceTarget::Texture2D; |
| @@ -35,6 +37,7 @@ SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_t | |||
| 35 | bool SurfaceTargetIsLayered(SurfaceTarget target) { | 37 | bool SurfaceTargetIsLayered(SurfaceTarget target) { |
| 36 | switch (target) { | 38 | switch (target) { |
| 37 | case SurfaceTarget::Texture1D: | 39 | case SurfaceTarget::Texture1D: |
| 40 | case SurfaceTarget::TextureBuffer: | ||
| 38 | case SurfaceTarget::Texture2D: | 41 | case SurfaceTarget::Texture2D: |
| 39 | case SurfaceTarget::Texture3D: | 42 | case SurfaceTarget::Texture3D: |
| 40 | return false; | 43 | return false; |
| @@ -53,6 +56,7 @@ bool SurfaceTargetIsLayered(SurfaceTarget target) { | |||
| 53 | bool SurfaceTargetIsArray(SurfaceTarget target) { | 56 | bool SurfaceTargetIsArray(SurfaceTarget target) { |
| 54 | switch (target) { | 57 | switch (target) { |
| 55 | case SurfaceTarget::Texture1D: | 58 | case SurfaceTarget::Texture1D: |
| 59 | case SurfaceTarget::TextureBuffer: | ||
| 56 | case SurfaceTarget::Texture2D: | 60 | case SurfaceTarget::Texture2D: |
| 57 | case SurfaceTarget::Texture3D: | 61 | case SurfaceTarget::Texture3D: |
| 58 | case SurfaceTarget::TextureCubemap: | 62 | case SurfaceTarget::TextureCubemap: |
| @@ -304,8 +308,8 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, | |||
| 304 | return PixelFormat::Z32F; | 308 | return PixelFormat::Z32F; |
| 305 | case Tegra::Texture::TextureFormat::Z16: | 309 | case Tegra::Texture::TextureFormat::Z16: |
| 306 | return PixelFormat::Z16; | 310 | return PixelFormat::Z16; |
| 307 | case Tegra::Texture::TextureFormat::Z24S8: | 311 | case Tegra::Texture::TextureFormat::S8Z24: |
| 308 | return PixelFormat::Z24S8; | 312 | return PixelFormat::S8Z24; |
| 309 | case Tegra::Texture::TextureFormat::ZF32_X24S8: | 313 | case Tegra::Texture::TextureFormat::ZF32_X24S8: |
| 310 | return PixelFormat::Z32FS8; | 314 | return PixelFormat::Z32FS8; |
| 311 | case Tegra::Texture::TextureFormat::DXT1: | 315 | case Tegra::Texture::TextureFormat::DXT1: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index b783e4b27..83f31c12c 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -114,6 +114,7 @@ enum class SurfaceType { | |||
| 114 | 114 | ||
| 115 | enum class SurfaceTarget { | 115 | enum class SurfaceTarget { |
| 116 | Texture1D, | 116 | Texture1D, |
| 117 | TextureBuffer, | ||
| 117 | Texture2D, | 118 | Texture2D, |
| 118 | Texture3D, | 119 | Texture3D, |
| 119 | Texture1DArray, | 120 | Texture1DArray, |
| @@ -122,71 +123,71 @@ enum class SurfaceTarget { | |||
| 122 | TextureCubeArray, | 123 | TextureCubeArray, |
| 123 | }; | 124 | }; |
| 124 | 125 | ||
| 125 | constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ | 126 | constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ |
| 126 | 1, // ABGR8U | 127 | 0, // ABGR8U |
| 127 | 1, // ABGR8S | 128 | 0, // ABGR8S |
| 128 | 1, // ABGR8UI | 129 | 0, // ABGR8UI |
| 129 | 1, // B5G6R5U | 130 | 0, // B5G6R5U |
| 130 | 1, // A2B10G10R10U | 131 | 0, // A2B10G10R10U |
| 131 | 1, // A1B5G5R5U | 132 | 0, // A1B5G5R5U |
| 132 | 1, // R8U | 133 | 0, // R8U |
| 133 | 1, // R8UI | 134 | 0, // R8UI |
| 134 | 1, // RGBA16F | 135 | 0, // RGBA16F |
| 135 | 1, // RGBA16U | 136 | 0, // RGBA16U |
| 136 | 1, // RGBA16UI | 137 | 0, // RGBA16UI |
| 137 | 1, // R11FG11FB10F | 138 | 0, // R11FG11FB10F |
| 138 | 1, // RGBA32UI | 139 | 0, // RGBA32UI |
| 139 | 4, // DXT1 | 140 | 2, // DXT1 |
| 140 | 4, // DXT23 | 141 | 2, // DXT23 |
| 141 | 4, // DXT45 | 142 | 2, // DXT45 |
| 142 | 4, // DXN1 | 143 | 2, // DXN1 |
| 143 | 4, // DXN2UNORM | 144 | 2, // DXN2UNORM |
| 144 | 4, // DXN2SNORM | 145 | 2, // DXN2SNORM |
| 145 | 4, // BC7U | 146 | 2, // BC7U |
| 146 | 4, // BC6H_UF16 | 147 | 2, // BC6H_UF16 |
| 147 | 4, // BC6H_SF16 | 148 | 2, // BC6H_SF16 |
| 148 | 4, // ASTC_2D_4X4 | 149 | 2, // ASTC_2D_4X4 |
| 149 | 1, // BGRA8 | 150 | 0, // BGRA8 |
| 150 | 1, // RGBA32F | 151 | 0, // RGBA32F |
| 151 | 1, // RG32F | 152 | 0, // RG32F |
| 152 | 1, // R32F | 153 | 0, // R32F |
| 153 | 1, // R16F | 154 | 0, // R16F |
| 154 | 1, // R16U | 155 | 0, // R16U |
| 155 | 1, // R16S | 156 | 0, // R16S |
| 156 | 1, // R16UI | 157 | 0, // R16UI |
| 157 | 1, // R16I | 158 | 0, // R16I |
| 158 | 1, // RG16 | 159 | 0, // RG16 |
| 159 | 1, // RG16F | 160 | 0, // RG16F |
| 160 | 1, // RG16UI | 161 | 0, // RG16UI |
| 161 | 1, // RG16I | 162 | 0, // RG16I |
| 162 | 1, // RG16S | 163 | 0, // RG16S |
| 163 | 1, // RGB32F | 164 | 0, // RGB32F |
| 164 | 1, // RGBA8_SRGB | 165 | 0, // RGBA8_SRGB |
| 165 | 1, // RG8U | 166 | 0, // RG8U |
| 166 | 1, // RG8S | 167 | 0, // RG8S |
| 167 | 1, // RG32UI | 168 | 0, // RG32UI |
| 168 | 1, // R32UI | 169 | 0, // R32UI |
| 169 | 4, // ASTC_2D_8X8 | 170 | 2, // ASTC_2D_8X8 |
| 170 | 4, // ASTC_2D_8X5 | 171 | 2, // ASTC_2D_8X5 |
| 171 | 4, // ASTC_2D_5X4 | 172 | 2, // ASTC_2D_5X4 |
| 172 | 1, // BGRA8_SRGB | 173 | 0, // BGRA8_SRGB |
| 173 | 4, // DXT1_SRGB | 174 | 2, // DXT1_SRGB |
| 174 | 4, // DXT23_SRGB | 175 | 2, // DXT23_SRGB |
| 175 | 4, // DXT45_SRGB | 176 | 2, // DXT45_SRGB |
| 176 | 4, // BC7U_SRGB | 177 | 2, // BC7U_SRGB |
| 177 | 4, // ASTC_2D_4X4_SRGB | 178 | 2, // ASTC_2D_4X4_SRGB |
| 178 | 4, // ASTC_2D_8X8_SRGB | 179 | 2, // ASTC_2D_8X8_SRGB |
| 179 | 4, // ASTC_2D_8X5_SRGB | 180 | 2, // ASTC_2D_8X5_SRGB |
| 180 | 4, // ASTC_2D_5X4_SRGB | 181 | 2, // ASTC_2D_5X4_SRGB |
| 181 | 4, // ASTC_2D_5X5 | 182 | 2, // ASTC_2D_5X5 |
| 182 | 4, // ASTC_2D_5X5_SRGB | 183 | 2, // ASTC_2D_5X5_SRGB |
| 183 | 4, // ASTC_2D_10X8 | 184 | 2, // ASTC_2D_10X8 |
| 184 | 4, // ASTC_2D_10X8_SRGB | 185 | 2, // ASTC_2D_10X8_SRGB |
| 185 | 1, // Z32F | 186 | 0, // Z32F |
| 186 | 1, // Z16 | 187 | 0, // Z16 |
| 187 | 1, // Z24S8 | 188 | 0, // Z24S8 |
| 188 | 1, // S8Z24 | 189 | 0, // S8Z24 |
| 189 | 1, // Z32FS8 | 190 | 0, // Z32FS8 |
| 190 | }}; | 191 | }}; |
| 191 | 192 | ||
| 192 | /** | 193 | /** |
| @@ -195,12 +196,14 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ | |||
| 195 | * compressed image. This is used for maintaining proper surface sizes for compressed | 196 | * compressed image. This is used for maintaining proper surface sizes for compressed |
| 196 | * texture formats. | 197 | * texture formats. |
| 197 | */ | 198 | */ |
| 198 | static constexpr u32 GetCompressionFactor(PixelFormat format) { | 199 | inline constexpr u32 GetCompressionFactorShift(PixelFormat format) { |
| 199 | if (format == PixelFormat::Invalid) | 200 | DEBUG_ASSERT(format != PixelFormat::Invalid); |
| 200 | return 0; | 201 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_factor_shift_table.size()); |
| 202 | return compression_factor_shift_table[static_cast<std::size_t>(format)]; | ||
| 203 | } | ||
| 201 | 204 | ||
| 202 | ASSERT(static_cast<std::size_t>(format) < compression_factor_table.size()); | 205 | inline constexpr u32 GetCompressionFactor(PixelFormat format) { |
| 203 | return compression_factor_table[static_cast<std::size_t>(format)]; | 206 | return 1U << GetCompressionFactorShift(format); |
| 204 | } | 207 | } |
| 205 | 208 | ||
| 206 | constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | 209 | constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ |
| @@ -436,6 +439,88 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { | |||
| 436 | return GetFormatBpp(pixel_format) / CHAR_BIT; | 439 | return GetFormatBpp(pixel_format) / CHAR_BIT; |
| 437 | } | 440 | } |
| 438 | 441 | ||
| 442 | enum class SurfaceCompression { | ||
| 443 | None, // Not compressed | ||
| 444 | Compressed, // Texture is compressed | ||
| 445 | Converted, // Texture is converted before upload or after download | ||
| 446 | Rearranged, // Texture is swizzled before upload or after download | ||
| 447 | }; | ||
| 448 | |||
| 449 | constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{ | ||
| 450 | SurfaceCompression::None, // ABGR8U | ||
| 451 | SurfaceCompression::None, // ABGR8S | ||
| 452 | SurfaceCompression::None, // ABGR8UI | ||
| 453 | SurfaceCompression::None, // B5G6R5U | ||
| 454 | SurfaceCompression::None, // A2B10G10R10U | ||
| 455 | SurfaceCompression::None, // A1B5G5R5U | ||
| 456 | SurfaceCompression::None, // R8U | ||
| 457 | SurfaceCompression::None, // R8UI | ||
| 458 | SurfaceCompression::None, // RGBA16F | ||
| 459 | SurfaceCompression::None, // RGBA16U | ||
| 460 | SurfaceCompression::None, // RGBA16UI | ||
| 461 | SurfaceCompression::None, // R11FG11FB10F | ||
| 462 | SurfaceCompression::None, // RGBA32UI | ||
| 463 | SurfaceCompression::Compressed, // DXT1 | ||
| 464 | SurfaceCompression::Compressed, // DXT23 | ||
| 465 | SurfaceCompression::Compressed, // DXT45 | ||
| 466 | SurfaceCompression::Compressed, // DXN1 | ||
| 467 | SurfaceCompression::Compressed, // DXN2UNORM | ||
| 468 | SurfaceCompression::Compressed, // DXN2SNORM | ||
| 469 | SurfaceCompression::Compressed, // BC7U | ||
| 470 | SurfaceCompression::Compressed, // BC6H_UF16 | ||
| 471 | SurfaceCompression::Compressed, // BC6H_SF16 | ||
| 472 | SurfaceCompression::Converted, // ASTC_2D_4X4 | ||
| 473 | SurfaceCompression::None, // BGRA8 | ||
| 474 | SurfaceCompression::None, // RGBA32F | ||
| 475 | SurfaceCompression::None, // RG32F | ||
| 476 | SurfaceCompression::None, // R32F | ||
| 477 | SurfaceCompression::None, // R16F | ||
| 478 | SurfaceCompression::None, // R16U | ||
| 479 | SurfaceCompression::None, // R16S | ||
| 480 | SurfaceCompression::None, // R16UI | ||
| 481 | SurfaceCompression::None, // R16I | ||
| 482 | SurfaceCompression::None, // RG16 | ||
| 483 | SurfaceCompression::None, // RG16F | ||
| 484 | SurfaceCompression::None, // RG16UI | ||
| 485 | SurfaceCompression::None, // RG16I | ||
| 486 | SurfaceCompression::None, // RG16S | ||
| 487 | SurfaceCompression::None, // RGB32F | ||
| 488 | SurfaceCompression::None, // RGBA8_SRGB | ||
| 489 | SurfaceCompression::None, // RG8U | ||
| 490 | SurfaceCompression::None, // RG8S | ||
| 491 | SurfaceCompression::None, // RG32UI | ||
| 492 | SurfaceCompression::None, // R32UI | ||
| 493 | SurfaceCompression::Converted, // ASTC_2D_8X8 | ||
| 494 | SurfaceCompression::Converted, // ASTC_2D_8X5 | ||
| 495 | SurfaceCompression::Converted, // ASTC_2D_5X4 | ||
| 496 | SurfaceCompression::None, // BGRA8_SRGB | ||
| 497 | SurfaceCompression::Compressed, // DXT1_SRGB | ||
| 498 | SurfaceCompression::Compressed, // DXT23_SRGB | ||
| 499 | SurfaceCompression::Compressed, // DXT45_SRGB | ||
| 500 | SurfaceCompression::Compressed, // BC7U_SRGB | ||
| 501 | SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB | ||
| 502 | SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB | ||
| 503 | SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB | ||
| 504 | SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB | ||
| 505 | SurfaceCompression::Converted, // ASTC_2D_5X5 | ||
| 506 | SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB | ||
| 507 | SurfaceCompression::Converted, // ASTC_2D_10X8 | ||
| 508 | SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB | ||
| 509 | SurfaceCompression::None, // Z32F | ||
| 510 | SurfaceCompression::None, // Z16 | ||
| 511 | SurfaceCompression::None, // Z24S8 | ||
| 512 | SurfaceCompression::Rearranged, // S8Z24 | ||
| 513 | SurfaceCompression::None, // Z32FS8 | ||
| 514 | }}; | ||
| 515 | |||
| 516 | constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { | ||
| 517 | if (format == PixelFormat::Invalid) { | ||
| 518 | return SurfaceCompression::None; | ||
| 519 | } | ||
| 520 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size()); | ||
| 521 | return compression_type_table[static_cast<std::size_t>(format)]; | ||
| 522 | } | ||
| 523 | |||
| 439 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); | 524 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); |
| 440 | 525 | ||
| 441 | bool SurfaceTargetIsLayered(SurfaceTarget target); | 526 | bool SurfaceTargetIsLayered(SurfaceTarget target); |
diff --git a/src/video_core/texture_cache.cpp b/src/video_core/texture_cache.cpp deleted file mode 100644 index e96eba7cc..000000000 --- a/src/video_core/texture_cache.cpp +++ /dev/null | |||
| @@ -1,386 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/alignment.h" | ||
| 6 | #include "common/assert.h" | ||
| 7 | #include "common/cityhash.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache.h" | ||
| 12 | #include "video_core/textures/decoders.h" | ||
| 13 | #include "video_core/textures/texture.h" | ||
| 14 | |||
| 15 | namespace VideoCommon { | ||
| 16 | |||
| 17 | using VideoCore::Surface::SurfaceTarget; | ||
| 18 | |||
| 19 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | ||
| 20 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | ||
| 21 | using VideoCore::Surface::ComponentTypeFromTexture; | ||
| 22 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 23 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 24 | using VideoCore::Surface::PixelFormatFromTextureFormat; | ||
| 25 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 26 | |||
| 27 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 28 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 29 | } | ||
| 30 | |||
| 31 | SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, | ||
| 32 | const Tegra::Texture::FullTextureInfo& config) { | ||
| 33 | SurfaceParams params; | ||
| 34 | params.is_tiled = config.tic.IsTiled(); | ||
| 35 | params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, | ||
| 36 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | ||
| 37 | params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, | ||
| 38 | params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; | ||
| 39 | params.pixel_format = | ||
| 40 | PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false); | ||
| 41 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); | ||
| 42 | params.type = GetFormatType(params.pixel_format); | ||
| 43 | params.target = SurfaceTargetFromTextureType(config.tic.texture_type); | ||
| 44 | params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); | ||
| 45 | params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); | ||
| 46 | params.depth = config.tic.Depth(); | ||
| 47 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 48 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 49 | params.depth *= 6; | ||
| 50 | } | ||
| 51 | params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); | ||
| 52 | params.unaligned_height = config.tic.Height(); | ||
| 53 | params.num_levels = config.tic.max_mip_level + 1; | ||
| 54 | |||
| 55 | params.CalculateCachedValues(); | ||
| 56 | return params; | ||
| 57 | } | ||
| 58 | |||
| 59 | SurfaceParams SurfaceParams::CreateForDepthBuffer( | ||
| 60 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 61 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 62 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 63 | SurfaceParams params; | ||
| 64 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 65 | params.block_width = 1 << std::min(block_width, 5U); | ||
| 66 | params.block_height = 1 << std::min(block_height, 5U); | ||
| 67 | params.block_depth = 1 << std::min(block_depth, 5U); | ||
| 68 | params.tile_width_spacing = 1; | ||
| 69 | params.pixel_format = PixelFormatFromDepthFormat(format); | ||
| 70 | params.component_type = ComponentTypeFromDepthFormat(format); | ||
| 71 | params.type = GetFormatType(params.pixel_format); | ||
| 72 | params.width = zeta_width; | ||
| 73 | params.height = zeta_height; | ||
| 74 | params.unaligned_height = zeta_height; | ||
| 75 | params.target = SurfaceTarget::Texture2D; | ||
| 76 | params.depth = 1; | ||
| 77 | params.num_levels = 1; | ||
| 78 | |||
| 79 | params.CalculateCachedValues(); | ||
| 80 | return params; | ||
| 81 | } | ||
| 82 | |||
| 83 | SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { | ||
| 84 | const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; | ||
| 85 | SurfaceParams params; | ||
| 86 | params.is_tiled = | ||
| 87 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 88 | params.block_width = 1 << config.memory_layout.block_width; | ||
| 89 | params.block_height = 1 << config.memory_layout.block_height; | ||
| 90 | params.block_depth = 1 << config.memory_layout.block_depth; | ||
| 91 | params.tile_width_spacing = 1; | ||
| 92 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 93 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 94 | params.type = GetFormatType(params.pixel_format); | ||
| 95 | if (params.is_tiled) { | ||
| 96 | params.width = config.width; | ||
| 97 | } else { | ||
| 98 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 99 | params.pitch = config.width; | ||
| 100 | params.width = params.pitch / bpp; | ||
| 101 | } | ||
| 102 | params.height = config.height; | ||
| 103 | params.depth = 1; | ||
| 104 | params.unaligned_height = config.height; | ||
| 105 | params.target = SurfaceTarget::Texture2D; | ||
| 106 | params.num_levels = 1; | ||
| 107 | |||
| 108 | params.CalculateCachedValues(); | ||
| 109 | return params; | ||
| 110 | } | ||
| 111 | |||
| 112 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 113 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 114 | SurfaceParams params{}; | ||
| 115 | params.is_tiled = !config.linear; | ||
| 116 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, | ||
| 117 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, | ||
| 118 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, | ||
| 119 | params.tile_width_spacing = 1; | ||
| 120 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 121 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 122 | params.type = GetFormatType(params.pixel_format); | ||
| 123 | params.width = config.width; | ||
| 124 | params.height = config.height; | ||
| 125 | params.unaligned_height = config.height; | ||
| 126 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | ||
| 127 | params.target = SurfaceTarget::Texture2D; | ||
| 128 | params.depth = 1; | ||
| 129 | params.num_levels = 1; | ||
| 130 | |||
| 131 | params.CalculateCachedValues(); | ||
| 132 | return params; | ||
| 133 | } | ||
| 134 | |||
| 135 | u32 SurfaceParams::GetMipWidth(u32 level) const { | ||
| 136 | return std::max(1U, width >> level); | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 SurfaceParams::GetMipHeight(u32 level) const { | ||
| 140 | return std::max(1U, height >> level); | ||
| 141 | } | ||
| 142 | |||
| 143 | u32 SurfaceParams::GetMipDepth(u32 level) const { | ||
| 144 | return IsLayered() ? depth : std::max(1U, depth >> level); | ||
| 145 | } | ||
| 146 | |||
| 147 | bool SurfaceParams::IsLayered() const { | ||
| 148 | switch (target) { | ||
| 149 | case SurfaceTarget::Texture1DArray: | ||
| 150 | case SurfaceTarget::Texture2DArray: | ||
| 151 | case SurfaceTarget::TextureCubeArray: | ||
| 152 | case SurfaceTarget::TextureCubemap: | ||
| 153 | return true; | ||
| 154 | default: | ||
| 155 | return false; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 160 | // Auto block resizing algorithm from: | ||
| 161 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 162 | if (level == 0) { | ||
| 163 | return block_height; | ||
| 164 | } | ||
| 165 | const u32 height{GetMipHeight(level)}; | ||
| 166 | const u32 default_block_height{GetDefaultBlockHeight(pixel_format)}; | ||
| 167 | const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; | ||
| 168 | u32 block_height = 16; | ||
| 169 | while (block_height > 1 && blocks_in_y <= block_height * 4) { | ||
| 170 | block_height >>= 1; | ||
| 171 | } | ||
| 172 | return block_height; | ||
| 173 | } | ||
| 174 | |||
| 175 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 176 | if (level == 0) | ||
| 177 | return block_depth; | ||
| 178 | if (target != SurfaceTarget::Texture3D) | ||
| 179 | return 1; | ||
| 180 | |||
| 181 | const u32 depth{GetMipDepth(level)}; | ||
| 182 | u32 block_depth = 32; | ||
| 183 | while (block_depth > 1 && depth * 2 <= block_depth) { | ||
| 184 | block_depth >>= 1; | ||
| 185 | } | ||
| 186 | if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { | ||
| 187 | return 16; | ||
| 188 | } | ||
| 189 | return block_depth; | ||
| 190 | } | ||
| 191 | |||
| 192 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 193 | std::size_t offset = 0; | ||
| 194 | for (u32 i = 0; i < level; i++) { | ||
| 195 | offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false); | ||
| 196 | } | ||
| 197 | return offset; | ||
| 198 | } | ||
| 199 | |||
| 200 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { | ||
| 201 | std::size_t offset = 0; | ||
| 202 | for (u32 i = 0; i < level; i++) { | ||
| 203 | offset += GetInnerMipmapMemorySize(i, true, false, false); | ||
| 204 | } | ||
| 205 | return offset; | ||
| 206 | } | ||
| 207 | |||
| 208 | std::size_t SurfaceParams::GetGuestLayerSize() const { | ||
| 209 | return GetInnerMemorySize(false, true, false); | ||
| 210 | } | ||
| 211 | |||
| 212 | std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { | ||
| 213 | return GetInnerMipmapMemorySize(level, true, IsLayered(), false); | ||
| 214 | } | ||
| 215 | |||
| 216 | bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { | ||
| 217 | if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != | ||
| 218 | std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, | ||
| 219 | view_params.component_type, view_params.type)) { | ||
| 220 | return false; | ||
| 221 | } | ||
| 222 | |||
| 223 | const SurfaceTarget view_target{view_params.target}; | ||
| 224 | if (view_target == target) { | ||
| 225 | return true; | ||
| 226 | } | ||
| 227 | |||
| 228 | switch (target) { | ||
| 229 | case SurfaceTarget::Texture1D: | ||
| 230 | case SurfaceTarget::Texture2D: | ||
| 231 | case SurfaceTarget::Texture3D: | ||
| 232 | return false; | ||
| 233 | case SurfaceTarget::Texture1DArray: | ||
| 234 | return view_target == SurfaceTarget::Texture1D; | ||
| 235 | case SurfaceTarget::Texture2DArray: | ||
| 236 | return view_target == SurfaceTarget::Texture2D; | ||
| 237 | case SurfaceTarget::TextureCubemap: | ||
| 238 | return view_target == SurfaceTarget::Texture2D || | ||
| 239 | view_target == SurfaceTarget::Texture2DArray; | ||
| 240 | case SurfaceTarget::TextureCubeArray: | ||
| 241 | return view_target == SurfaceTarget::Texture2D || | ||
| 242 | view_target == SurfaceTarget::Texture2DArray || | ||
| 243 | view_target == SurfaceTarget::TextureCubemap; | ||
| 244 | default: | ||
| 245 | UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target)); | ||
| 246 | return false; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | bool SurfaceParams::IsPixelFormatZeta() const { | ||
| 251 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 252 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 253 | } | ||
| 254 | |||
| 255 | void SurfaceParams::CalculateCachedValues() { | ||
| 256 | guest_size_in_bytes = GetInnerMemorySize(false, false, false); | ||
| 257 | |||
| 258 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 259 | if (IsPixelFormatASTC(pixel_format)) { | ||
| 260 | host_size_in_bytes = width * height * depth * 4; | ||
| 261 | } else { | ||
| 262 | host_size_in_bytes = GetInnerMemorySize(true, false, false); | ||
| 263 | } | ||
| 264 | |||
| 265 | switch (target) { | ||
| 266 | case SurfaceTarget::Texture1D: | ||
| 267 | case SurfaceTarget::Texture2D: | ||
| 268 | case SurfaceTarget::Texture3D: | ||
| 269 | num_layers = 1; | ||
| 270 | break; | ||
| 271 | case SurfaceTarget::Texture1DArray: | ||
| 272 | case SurfaceTarget::Texture2DArray: | ||
| 273 | case SurfaceTarget::TextureCubemap: | ||
| 274 | case SurfaceTarget::TextureCubeArray: | ||
| 275 | num_layers = depth; | ||
| 276 | break; | ||
| 277 | default: | ||
| 278 | UNREACHABLE(); | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, | ||
| 283 | bool uncompressed) const { | ||
| 284 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 285 | const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; | ||
| 286 | const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; | ||
| 287 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)}; | ||
| 288 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)}; | ||
| 289 | const u32 depth{layer_only ? 1U : GetMipDepth(level)}; | ||
| 290 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height, | ||
| 291 | depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); | ||
| 292 | } | ||
| 293 | |||
| 294 | std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, | ||
| 295 | bool uncompressed) const { | ||
| 296 | std::size_t size = 0; | ||
| 297 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 298 | size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); | ||
| 299 | } | ||
| 300 | if (!as_host_size && is_tiled) { | ||
| 301 | size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); | ||
| 302 | } | ||
| 303 | return size; | ||
| 304 | } | ||
| 305 | |||
| 306 | std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const { | ||
| 307 | std::map<u64, std::pair<u32, u32>> view_offset_map; | ||
| 308 | switch (target) { | ||
| 309 | case SurfaceTarget::Texture1D: | ||
| 310 | case SurfaceTarget::Texture2D: | ||
| 311 | case SurfaceTarget::Texture3D: { | ||
| 312 | constexpr u32 layer = 0; | ||
| 313 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 314 | const std::size_t offset{GetGuestMipmapLevelOffset(level)}; | ||
| 315 | view_offset_map.insert({offset, {layer, level}}); | ||
| 316 | } | ||
| 317 | break; | ||
| 318 | } | ||
| 319 | case SurfaceTarget::Texture1DArray: | ||
| 320 | case SurfaceTarget::Texture2DArray: | ||
| 321 | case SurfaceTarget::TextureCubemap: | ||
| 322 | case SurfaceTarget::TextureCubeArray: { | ||
| 323 | const std::size_t layer_size{GetGuestLayerSize()}; | ||
| 324 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 325 | const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; | ||
| 326 | for (u32 layer = 0; layer < num_layers; ++layer) { | ||
| 327 | const auto layer_offset{static_cast<std::size_t>(layer_size * layer)}; | ||
| 328 | const std::size_t offset{level_offset + layer_offset}; | ||
| 329 | view_offset_map.insert({offset, {layer, level}}); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | break; | ||
| 333 | } | ||
| 334 | default: | ||
| 335 | UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target)); | ||
| 336 | } | ||
| 337 | return view_offset_map; | ||
| 338 | } | ||
| 339 | |||
| 340 | bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { | ||
| 341 | return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && | ||
| 342 | IsInBounds(view_params, layer, level); | ||
| 343 | } | ||
| 344 | |||
| 345 | bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { | ||
| 346 | return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); | ||
| 347 | } | ||
| 348 | |||
| 349 | bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { | ||
| 350 | if (view_params.target != SurfaceTarget::Texture3D) { | ||
| 351 | return true; | ||
| 352 | } | ||
| 353 | return view_params.depth == GetMipDepth(level); | ||
| 354 | } | ||
| 355 | |||
| 356 | bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { | ||
| 357 | return layer + view_params.num_layers <= num_layers && | ||
| 358 | level + view_params.num_levels <= num_levels; | ||
| 359 | } | ||
| 360 | |||
| 361 | std::size_t HasheableSurfaceParams::Hash() const { | ||
| 362 | return static_cast<std::size_t>( | ||
| 363 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 364 | } | ||
| 365 | |||
| 366 | bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { | ||
| 367 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 368 | height, depth, pitch, unaligned_height, num_levels, pixel_format, | ||
| 369 | component_type, type, target) == | ||
| 370 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 371 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 372 | rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, | ||
| 373 | rhs.type, rhs.target); | ||
| 374 | } | ||
| 375 | |||
| 376 | std::size_t ViewKey::Hash() const { | ||
| 377 | return static_cast<std::size_t>( | ||
| 378 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 379 | } | ||
| 380 | |||
| 381 | bool ViewKey::operator==(const ViewKey& rhs) const { | ||
| 382 | return std::tie(base_layer, num_layers, base_level, num_levels) == | ||
| 383 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); | ||
| 384 | } | ||
| 385 | |||
| 386 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache.h b/src/video_core/texture_cache.h deleted file mode 100644 index 041551691..000000000 --- a/src/video_core/texture_cache.h +++ /dev/null | |||
| @@ -1,586 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <memory> | ||
| 9 | #include <set> | ||
| 10 | #include <tuple> | ||
| 11 | #include <type_traits> | ||
| 12 | #include <unordered_map> | ||
| 13 | |||
| 14 | #include <boost/icl/interval_map.hpp> | ||
| 15 | #include <boost/range/iterator_range.hpp> | ||
| 16 | |||
| 17 | #include "common/assert.h" | ||
| 18 | #include "common/common_types.h" | ||
| 19 | #include "core/memory.h" | ||
| 20 | #include "video_core/engines/fermi_2d.h" | ||
| 21 | #include "video_core/engines/maxwell_3d.h" | ||
| 22 | #include "video_core/gpu.h" | ||
| 23 | #include "video_core/rasterizer_interface.h" | ||
| 24 | #include "video_core/surface.h" | ||
| 25 | |||
| 26 | namespace Core { | ||
| 27 | class System; | ||
| 28 | } | ||
| 29 | |||
| 30 | namespace Tegra::Texture { | ||
| 31 | struct FullTextureInfo; | ||
| 32 | } | ||
| 33 | |||
| 34 | namespace VideoCore { | ||
| 35 | class RasterizerInterface; | ||
| 36 | } | ||
| 37 | |||
| 38 | namespace VideoCommon { | ||
| 39 | |||
| 40 | class HasheableSurfaceParams { | ||
| 41 | public: | ||
| 42 | std::size_t Hash() const; | ||
| 43 | |||
| 44 | bool operator==(const HasheableSurfaceParams& rhs) const; | ||
| 45 | |||
| 46 | protected: | ||
| 47 | // Avoid creation outside of a managed environment. | ||
| 48 | HasheableSurfaceParams() = default; | ||
| 49 | |||
| 50 | bool is_tiled; | ||
| 51 | u32 block_width; | ||
| 52 | u32 block_height; | ||
| 53 | u32 block_depth; | ||
| 54 | u32 tile_width_spacing; | ||
| 55 | u32 width; | ||
| 56 | u32 height; | ||
| 57 | u32 depth; | ||
| 58 | u32 pitch; | ||
| 59 | u32 unaligned_height; | ||
| 60 | u32 num_levels; | ||
| 61 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 62 | VideoCore::Surface::ComponentType component_type; | ||
| 63 | VideoCore::Surface::SurfaceType type; | ||
| 64 | VideoCore::Surface::SurfaceTarget target; | ||
| 65 | }; | ||
| 66 | |||
| 67 | class SurfaceParams final : public HasheableSurfaceParams { | ||
| 68 | public: | ||
| 69 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 70 | static SurfaceParams CreateForTexture(Core::System& system, | ||
| 71 | const Tegra::Texture::FullTextureInfo& config); | ||
| 72 | |||
| 73 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 74 | static SurfaceParams CreateForDepthBuffer( | ||
| 75 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 76 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 77 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 78 | |||
| 79 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 80 | static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); | ||
| 81 | |||
| 82 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 83 | static SurfaceParams CreateForFermiCopySurface( | ||
| 84 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 85 | |||
| 86 | bool IsTiled() const { | ||
| 87 | return is_tiled; | ||
| 88 | } | ||
| 89 | |||
| 90 | u32 GetBlockWidth() const { | ||
| 91 | return block_width; | ||
| 92 | } | ||
| 93 | |||
| 94 | u32 GetTileWidthSpacing() const { | ||
| 95 | return tile_width_spacing; | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 GetWidth() const { | ||
| 99 | return width; | ||
| 100 | } | ||
| 101 | |||
| 102 | u32 GetHeight() const { | ||
| 103 | return height; | ||
| 104 | } | ||
| 105 | |||
| 106 | u32 GetDepth() const { | ||
| 107 | return depth; | ||
| 108 | } | ||
| 109 | |||
| 110 | u32 GetPitch() const { | ||
| 111 | return pitch; | ||
| 112 | } | ||
| 113 | |||
| 114 | u32 GetNumLevels() const { | ||
| 115 | return num_levels; | ||
| 116 | } | ||
| 117 | |||
| 118 | VideoCore::Surface::PixelFormat GetPixelFormat() const { | ||
| 119 | return pixel_format; | ||
| 120 | } | ||
| 121 | |||
| 122 | VideoCore::Surface::ComponentType GetComponentType() const { | ||
| 123 | return component_type; | ||
| 124 | } | ||
| 125 | |||
| 126 | VideoCore::Surface::SurfaceTarget GetTarget() const { | ||
| 127 | return target; | ||
| 128 | } | ||
| 129 | |||
| 130 | VideoCore::Surface::SurfaceType GetType() const { | ||
| 131 | return type; | ||
| 132 | } | ||
| 133 | |||
| 134 | std::size_t GetGuestSizeInBytes() const { | ||
| 135 | return guest_size_in_bytes; | ||
| 136 | } | ||
| 137 | |||
| 138 | std::size_t GetHostSizeInBytes() const { | ||
| 139 | return host_size_in_bytes; | ||
| 140 | } | ||
| 141 | |||
| 142 | u32 GetNumLayers() const { | ||
| 143 | return num_layers; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Returns the width of a given mipmap level. | ||
| 147 | u32 GetMipWidth(u32 level) const; | ||
| 148 | |||
| 149 | /// Returns the height of a given mipmap level. | ||
| 150 | u32 GetMipHeight(u32 level) const; | ||
| 151 | |||
| 152 | /// Returns the depth of a given mipmap level. | ||
| 153 | u32 GetMipDepth(u32 level) const; | ||
| 154 | |||
| 155 | /// Returns true if these parameters are from a layered surface. | ||
| 156 | bool IsLayered() const; | ||
| 157 | |||
| 158 | /// Returns the block height of a given mipmap level. | ||
| 159 | u32 GetMipBlockHeight(u32 level) const; | ||
| 160 | |||
| 161 | /// Returns the block depth of a given mipmap level. | ||
| 162 | u32 GetMipBlockDepth(u32 level) const; | ||
| 163 | |||
| 164 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 165 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 166 | |||
| 167 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 168 | std::size_t GetHostMipmapLevelOffset(u32 level) const; | ||
| 169 | |||
| 170 | /// Returns the size of a layer in bytes in guest memory. | ||
| 171 | std::size_t GetGuestLayerSize() const; | ||
| 172 | |||
| 173 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 174 | std::size_t GetHostLayerSize(u32 level) const; | ||
| 175 | |||
| 176 | /// Returns true if another surface can be familiar with this. This is a loosely defined term | ||
| 177 | /// that reflects the possibility of these two surface parameters potentially being part of a | ||
| 178 | /// bigger superset. | ||
| 179 | bool IsFamiliar(const SurfaceParams& view_params) const; | ||
| 180 | |||
| 181 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 182 | bool IsPixelFormatZeta() const; | ||
| 183 | |||
| 184 | /// Creates a map that redirects an address difference to a layer and mipmap level. | ||
| 185 | std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const; | ||
| 186 | |||
| 187 | /// Returns true if the passed surface view parameters is equal or a valid subset of this. | ||
| 188 | bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; | ||
| 189 | |||
| 190 | private: | ||
| 191 | /// Calculates values that can be deduced from HasheableSurfaceParams. | ||
| 192 | void CalculateCachedValues(); | ||
| 193 | |||
| 194 | /// Returns the size of a given mipmap level. | ||
| 195 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, | ||
| 196 | bool uncompressed) const; | ||
| 197 | |||
| 198 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 199 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; | ||
| 200 | |||
| 201 | /// Returns true if the passed view width and height match the size of this params in a given | ||
| 202 | /// mipmap level. | ||
| 203 | bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; | ||
| 204 | |||
| 205 | /// Returns true if the passed view depth match the size of this params in a given mipmap level. | ||
| 206 | bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; | ||
| 207 | |||
| 208 | /// Returns true if the passed view layers and mipmap levels are in bounds. | ||
| 209 | bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; | ||
| 210 | |||
| 211 | std::size_t guest_size_in_bytes; | ||
| 212 | std::size_t host_size_in_bytes; | ||
| 213 | u32 num_layers; | ||
| 214 | }; | ||
| 215 | |||
| 216 | struct ViewKey { | ||
| 217 | std::size_t Hash() const; | ||
| 218 | |||
| 219 | bool operator==(const ViewKey& rhs) const; | ||
| 220 | |||
| 221 | u32 base_layer{}; | ||
| 222 | u32 num_layers{}; | ||
| 223 | u32 base_level{}; | ||
| 224 | u32 num_levels{}; | ||
| 225 | }; | ||
| 226 | |||
| 227 | } // namespace VideoCommon | ||
| 228 | |||
| 229 | namespace std { | ||
| 230 | |||
| 231 | template <> | ||
| 232 | struct hash<VideoCommon::SurfaceParams> { | ||
| 233 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 234 | return k.Hash(); | ||
| 235 | } | ||
| 236 | }; | ||
| 237 | |||
| 238 | template <> | ||
| 239 | struct hash<VideoCommon::ViewKey> { | ||
| 240 | std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { | ||
| 241 | return k.Hash(); | ||
| 242 | } | ||
| 243 | }; | ||
| 244 | |||
| 245 | } // namespace std | ||
| 246 | |||
| 247 | namespace VideoCommon { | ||
| 248 | |||
| 249 | template <typename TView, typename TExecutionContext> | ||
| 250 | class SurfaceBase { | ||
| 251 | static_assert(std::is_trivially_copyable_v<TExecutionContext>); | ||
| 252 | |||
| 253 | public: | ||
| 254 | virtual void LoadBuffer() = 0; | ||
| 255 | |||
| 256 | virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0; | ||
| 257 | |||
| 258 | virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; | ||
| 259 | |||
| 260 | TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) { | ||
| 261 | if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) { | ||
| 262 | // It can't be a view if it's in a prior address. | ||
| 263 | return {}; | ||
| 264 | } | ||
| 265 | |||
| 266 | const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)}; | ||
| 267 | const auto it{view_offset_map.find(relative_offset)}; | ||
| 268 | if (it == view_offset_map.end()) { | ||
| 269 | // Couldn't find an aligned view. | ||
| 270 | return {}; | ||
| 271 | } | ||
| 272 | const auto [layer, level] = it->second; | ||
| 273 | |||
| 274 | if (!params.IsViewValid(view_params, layer, level)) { | ||
| 275 | return {}; | ||
| 276 | } | ||
| 277 | |||
| 278 | return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); | ||
| 279 | } | ||
| 280 | |||
| 281 | VAddr GetCpuAddr() const { | ||
| 282 | ASSERT(is_registered); | ||
| 283 | return cpu_addr; | ||
| 284 | } | ||
| 285 | |||
| 286 | u8* GetHostPtr() const { | ||
| 287 | ASSERT(is_registered); | ||
| 288 | return host_ptr; | ||
| 289 | } | ||
| 290 | |||
| 291 | CacheAddr GetCacheAddr() const { | ||
| 292 | ASSERT(is_registered); | ||
| 293 | return cache_addr; | ||
| 294 | } | ||
| 295 | |||
| 296 | std::size_t GetSizeInBytes() const { | ||
| 297 | return params.GetGuestSizeInBytes(); | ||
| 298 | } | ||
| 299 | |||
| 300 | void MarkAsModified(bool is_modified_) { | ||
| 301 | is_modified = is_modified_; | ||
| 302 | } | ||
| 303 | |||
| 304 | const SurfaceParams& GetSurfaceParams() const { | ||
| 305 | return params; | ||
| 306 | } | ||
| 307 | |||
| 308 | TView* GetView(VAddr view_addr, const SurfaceParams& view_params) { | ||
| 309 | TView* view{TryGetView(view_addr, view_params)}; | ||
| 310 | ASSERT(view != nullptr); | ||
| 311 | return view; | ||
| 312 | } | ||
| 313 | |||
| 314 | void Register(VAddr cpu_addr_, u8* host_ptr_) { | ||
| 315 | ASSERT(!is_registered); | ||
| 316 | is_registered = true; | ||
| 317 | cpu_addr = cpu_addr_; | ||
| 318 | host_ptr = host_ptr_; | ||
| 319 | cache_addr = ToCacheAddr(host_ptr_); | ||
| 320 | } | ||
| 321 | |||
| 322 | void Register(VAddr cpu_addr_) { | ||
| 323 | Register(cpu_addr_, Memory::GetPointer(cpu_addr_)); | ||
| 324 | } | ||
| 325 | |||
| 326 | void Unregister() { | ||
| 327 | ASSERT(is_registered); | ||
| 328 | is_registered = false; | ||
| 329 | } | ||
| 330 | |||
| 331 | bool IsRegistered() const { | ||
| 332 | return is_registered; | ||
| 333 | } | ||
| 334 | |||
| 335 | protected: | ||
| 336 | explicit SurfaceBase(const SurfaceParams& params) | ||
| 337 | : params{params}, view_offset_map{params.CreateViewOffsetMap()} {} | ||
| 338 | |||
| 339 | ~SurfaceBase() = default; | ||
| 340 | |||
| 341 | virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0; | ||
| 342 | |||
| 343 | bool IsModified() const { | ||
| 344 | return is_modified; | ||
| 345 | } | ||
| 346 | |||
| 347 | const SurfaceParams params; | ||
| 348 | |||
| 349 | private: | ||
| 350 | TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { | ||
| 351 | const ViewKey key{base_layer, num_layers, base_level, num_levels}; | ||
| 352 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 353 | auto& view{entry->second}; | ||
| 354 | if (is_cache_miss) { | ||
| 355 | view = CreateView(key); | ||
| 356 | } | ||
| 357 | return view.get(); | ||
| 358 | } | ||
| 359 | |||
| 360 | const std::map<u64, std::pair<u32, u32>> view_offset_map; | ||
| 361 | |||
| 362 | VAddr cpu_addr{}; | ||
| 363 | u8* host_ptr{}; | ||
| 364 | CacheAddr cache_addr{}; | ||
| 365 | bool is_modified{}; | ||
| 366 | bool is_registered{}; | ||
| 367 | std::unordered_map<ViewKey, std::unique_ptr<TView>> views; | ||
| 368 | }; | ||
| 369 | |||
| 370 | template <typename TSurface, typename TView, typename TExecutionContext> | ||
| 371 | class TextureCache { | ||
| 372 | static_assert(std::is_trivially_copyable_v<TExecutionContext>); | ||
| 373 | using ResultType = std::tuple<TView*, TExecutionContext>; | ||
| 374 | using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>; | ||
| 375 | using IntervalType = typename IntervalMap::interval_type; | ||
| 376 | |||
| 377 | public: | ||
| 378 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 379 | for (TSurface* surface : GetSurfacesInRegion(addr, size)) { | ||
| 380 | if (!surface->IsRegistered()) { | ||
| 381 | // Skip duplicates | ||
| 382 | continue; | ||
| 383 | } | ||
| 384 | Unregister(surface); | ||
| 385 | } | ||
| 386 | } | ||
| 387 | |||
| 388 | ResultType GetTextureSurface(TExecutionContext exctx, | ||
| 389 | const Tegra::Texture::FullTextureInfo& config) { | ||
| 390 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 391 | const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())}; | ||
| 392 | if (!cpu_addr) { | ||
| 393 | return {{}, exctx}; | ||
| 394 | } | ||
| 395 | const auto params{SurfaceParams::CreateForTexture(system, config)}; | ||
| 396 | return GetSurfaceView(exctx, *cpu_addr, params, true); | ||
| 397 | } | ||
| 398 | |||
| 399 | ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { | ||
| 400 | const auto& regs{system.GPU().Maxwell3D().regs}; | ||
| 401 | if (!regs.zeta.Address() || !regs.zeta_enable) { | ||
| 402 | return {{}, exctx}; | ||
| 403 | } | ||
| 404 | |||
| 405 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 406 | const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())}; | ||
| 407 | if (!cpu_addr) { | ||
| 408 | return {{}, exctx}; | ||
| 409 | } | ||
| 410 | |||
| 411 | const auto depth_params{SurfaceParams::CreateForDepthBuffer( | ||
| 412 | system, regs.zeta_width, regs.zeta_height, regs.zeta.format, | ||
| 413 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 414 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 415 | return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents); | ||
| 416 | } | ||
| 417 | |||
| 418 | ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, | ||
| 419 | bool preserve_contents) { | ||
| 420 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 421 | |||
| 422 | const auto& regs{system.GPU().Maxwell3D().regs}; | ||
| 423 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | ||
| 424 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 425 | return {{}, exctx}; | ||
| 426 | } | ||
| 427 | |||
| 428 | auto& memory_manager{system.GPU().MemoryManager()}; | ||
| 429 | const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; | ||
| 430 | const auto cpu_addr{memory_manager.GpuToCpuAddress( | ||
| 431 | config.Address() + config.base_layer * config.layer_stride * sizeof(u32))}; | ||
| 432 | if (!cpu_addr) { | ||
| 433 | return {{}, exctx}; | ||
| 434 | } | ||
| 435 | |||
| 436 | return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||
| 437 | preserve_contents); | ||
| 438 | } | ||
| 439 | |||
| 440 | ResultType GetFermiSurface(TExecutionContext exctx, | ||
| 441 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 442 | const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())}; | ||
| 443 | ASSERT(cpu_addr); | ||
| 444 | return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config), | ||
| 445 | true); | ||
| 446 | } | ||
| 447 | |||
| 448 | TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { | ||
| 449 | const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; | ||
| 450 | return it != registered_surfaces.end() ? *it->second.begin() : nullptr; | ||
| 451 | } | ||
| 452 | |||
| 453 | protected: | ||
| 454 | TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 455 | : system{system}, rasterizer{rasterizer} {} | ||
| 456 | |||
| 457 | ~TextureCache() = default; | ||
| 458 | |||
| 459 | virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, | ||
| 460 | const SurfaceParams& params, bool preserve_contents, | ||
| 461 | const std::vector<TSurface*>& overlaps) = 0; | ||
| 462 | |||
| 463 | virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; | ||
| 464 | |||
| 465 | void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) { | ||
| 466 | surface->Register(cpu_addr, host_ptr); | ||
| 467 | registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); | ||
| 468 | rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); | ||
| 469 | } | ||
| 470 | |||
| 471 | void Unregister(TSurface* surface) { | ||
| 472 | registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); | ||
| 473 | rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); | ||
| 474 | surface->Unregister(); | ||
| 475 | } | ||
| 476 | |||
| 477 | TSurface* GetUncachedSurface(const SurfaceParams& params) { | ||
| 478 | if (TSurface* surface = TryGetReservedSurface(params); surface) | ||
| 479 | return surface; | ||
| 480 | // No reserved surface available, create a new one and reserve it | ||
| 481 | auto new_surface{CreateSurface(params)}; | ||
| 482 | TSurface* surface{new_surface.get()}; | ||
| 483 | ReserveSurface(params, std::move(new_surface)); | ||
| 484 | return surface; | ||
| 485 | } | ||
| 486 | |||
| 487 | Core::System& system; | ||
| 488 | |||
| 489 | private: | ||
| 490 | ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params, | ||
| 491 | bool preserve_contents) { | ||
| 492 | const auto host_ptr{Memory::GetPointer(cpu_addr)}; | ||
| 493 | const auto cache_addr{ToCacheAddr(host_ptr)}; | ||
| 494 | const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; | ||
| 495 | if (overlaps.empty()) { | ||
| 496 | return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); | ||
| 497 | } | ||
| 498 | |||
| 499 | if (overlaps.size() == 1) { | ||
| 500 | if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view) | ||
| 501 | return {view, exctx}; | ||
| 502 | } | ||
| 503 | |||
| 504 | TView* fast_view; | ||
| 505 | std::tie(fast_view, exctx) = | ||
| 506 | TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps); | ||
| 507 | |||
| 508 | for (TSurface* surface : overlaps) { | ||
| 509 | if (!fast_view) { | ||
| 510 | // Flush even when we don't care about the contents, to preserve memory not written | ||
| 511 | // by the new surface. | ||
| 512 | exctx = surface->FlushBuffer(exctx); | ||
| 513 | } | ||
| 514 | Unregister(surface); | ||
| 515 | } | ||
| 516 | |||
| 517 | if (fast_view) { | ||
| 518 | return {fast_view, exctx}; | ||
| 519 | } | ||
| 520 | |||
| 521 | return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); | ||
| 522 | } | ||
| 523 | |||
| 524 | ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, | ||
| 525 | const SurfaceParams& params, bool preserve_contents) { | ||
| 526 | TSurface* new_surface{GetUncachedSurface(params)}; | ||
| 527 | Register(new_surface, cpu_addr, host_ptr); | ||
| 528 | if (preserve_contents) { | ||
| 529 | exctx = LoadSurface(exctx, new_surface); | ||
| 530 | } | ||
| 531 | return {new_surface->GetView(cpu_addr, params), exctx}; | ||
| 532 | } | ||
| 533 | |||
| 534 | TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) { | ||
| 535 | surface->LoadBuffer(); | ||
| 536 | exctx = surface->UploadTexture(exctx); | ||
| 537 | surface->MarkAsModified(false); | ||
| 538 | return exctx; | ||
| 539 | } | ||
| 540 | |||
| 541 | std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { | ||
| 542 | if (size == 0) { | ||
| 543 | return {}; | ||
| 544 | } | ||
| 545 | const IntervalType interval{cache_addr, cache_addr + size}; | ||
| 546 | |||
| 547 | std::vector<TSurface*> surfaces; | ||
| 548 | for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { | ||
| 549 | surfaces.push_back(*pair.second.begin()); | ||
| 550 | } | ||
| 551 | return surfaces; | ||
| 552 | } | ||
| 553 | |||
| 554 | void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) { | ||
| 555 | surface_reserve[params].push_back(std::move(surface)); | ||
| 556 | } | ||
| 557 | |||
| 558 | TSurface* TryGetReservedSurface(const SurfaceParams& params) { | ||
| 559 | auto search{surface_reserve.find(params)}; | ||
| 560 | if (search == surface_reserve.end()) { | ||
| 561 | return {}; | ||
| 562 | } | ||
| 563 | for (auto& surface : search->second) { | ||
| 564 | if (!surface->IsRegistered()) { | ||
| 565 | return surface.get(); | ||
| 566 | } | ||
| 567 | } | ||
| 568 | return {}; | ||
| 569 | } | ||
| 570 | |||
| 571 | IntervalType GetSurfaceInterval(TSurface* surface) const { | ||
| 572 | return IntervalType::right_open(surface->GetCacheAddr(), | ||
| 573 | surface->GetCacheAddr() + surface->GetSizeInBytes()); | ||
| 574 | } | ||
| 575 | |||
| 576 | VideoCore::RasterizerInterface& rasterizer; | ||
| 577 | |||
| 578 | IntervalMap registered_surfaces; | ||
| 579 | |||
| 580 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | ||
| 581 | /// previously been used. This is to prevent surfaces from being constantly created and | ||
| 582 | /// destroyed when used with different surface parameters. | ||
| 583 | std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve; | ||
| 584 | }; | ||
| 585 | |||
| 586 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h new file mode 100644 index 000000000..9c21a0649 --- /dev/null +++ b/src/video_core/texture_cache/copy_params.h | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | struct CopyParams { | ||
| 12 | constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, | ||
| 13 | u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, | ||
| 14 | u32 depth) | ||
| 15 | : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, | ||
| 16 | dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, | ||
| 17 | dest_level{dest_level}, width{width}, height{height}, depth{depth} {} | ||
| 18 | |||
| 19 | constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) | ||
| 20 | : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, | ||
| 21 | dest_level{level}, width{width}, height{height}, depth{depth} {} | ||
| 22 | |||
| 23 | u32 source_x; | ||
| 24 | u32 source_y; | ||
| 25 | u32 source_z; | ||
| 26 | u32 dest_x; | ||
| 27 | u32 dest_y; | ||
| 28 | u32 dest_z; | ||
| 29 | u32 source_level; | ||
| 30 | u32 dest_level; | ||
| 31 | u32 width; | ||
| 32 | u32 height; | ||
| 33 | u32 depth; | ||
| 34 | }; | ||
| 35 | |||
| 36 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp new file mode 100644 index 000000000..7a0fdb19b --- /dev/null +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -0,0 +1,300 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/microprofile.h" | ||
| 8 | #include "video_core/memory_manager.h" | ||
| 9 | #include "video_core/texture_cache/surface_base.h" | ||
| 10 | #include "video_core/texture_cache/surface_params.h" | ||
| 11 | #include "video_core/textures/convert.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); | ||
| 16 | MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); | ||
| 17 | |||
| 18 | using Tegra::Texture::ConvertFromGuestToHost; | ||
| 19 | using VideoCore::MortonSwizzleMode; | ||
| 20 | using VideoCore::Surface::SurfaceCompression; | ||
| 21 | |||
| 22 | StagingCache::StagingCache() = default; | ||
| 23 | |||
| 24 | StagingCache::~StagingCache() = default; | ||
| 25 | |||
| 26 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 27 | : params{params}, mipmap_sizes(params.num_levels), | ||
| 28 | mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ | ||
| 29 | params.GetHostSizeInBytes()} { | ||
| 30 | std::size_t offset = 0; | ||
| 31 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 32 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | ||
| 33 | mipmap_sizes[level] = mipmap_size; | ||
| 34 | mipmap_offsets[level] = offset; | ||
| 35 | offset += mipmap_size; | ||
| 36 | } | ||
| 37 | layer_size = offset; | ||
| 38 | if (params.is_layered) { | ||
| 39 | if (params.is_tiled) { | ||
| 40 | layer_size = | ||
| 41 | SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); | ||
| 42 | } | ||
| 43 | guest_memory_size = layer_size * params.depth; | ||
| 44 | } else { | ||
| 45 | guest_memory_size = layer_size; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { | ||
| 50 | const u32 src_bpp{params.GetBytesPerPixel()}; | ||
| 51 | const u32 dst_bpp{rhs.GetBytesPerPixel()}; | ||
| 52 | const bool ib1 = params.IsBuffer(); | ||
| 53 | const bool ib2 = rhs.IsBuffer(); | ||
| 54 | if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { | ||
| 55 | const bool cb1 = params.IsCompressed(); | ||
| 56 | const bool cb2 = rhs.IsCompressed(); | ||
| 57 | if (cb1 == cb2) { | ||
| 58 | return MatchTopologyResult::FullMatch; | ||
| 59 | } | ||
| 60 | return MatchTopologyResult::CompressUnmatch; | ||
| 61 | } | ||
| 62 | return MatchTopologyResult::None; | ||
| 63 | } | ||
| 64 | |||
| 65 | MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { | ||
| 66 | // Buffer surface Check | ||
| 67 | if (params.IsBuffer()) { | ||
| 68 | const std::size_t wd1 = params.width * params.GetBytesPerPixel(); | ||
| 69 | const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); | ||
| 70 | if (wd1 == wd2) { | ||
| 71 | return MatchStructureResult::FullMatch; | ||
| 72 | } | ||
| 73 | return MatchStructureResult::None; | ||
| 74 | } | ||
| 75 | |||
| 76 | // Linear Surface check | ||
| 77 | if (!params.is_tiled) { | ||
| 78 | if (std::tie(params.width, params.height, params.pitch) == | ||
| 79 | std::tie(rhs.width, rhs.height, rhs.pitch)) { | ||
| 80 | return MatchStructureResult::FullMatch; | ||
| 81 | } | ||
| 82 | return MatchStructureResult::None; | ||
| 83 | } | ||
| 84 | |||
| 85 | // Tiled Surface check | ||
| 86 | if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, | ||
| 87 | params.tile_width_spacing, params.num_levels) == | ||
| 88 | std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 89 | rhs.tile_width_spacing, rhs.num_levels)) { | ||
| 90 | if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { | ||
| 91 | return MatchStructureResult::FullMatch; | ||
| 92 | } | ||
| 93 | const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, | ||
| 94 | rhs.pixel_format); | ||
| 95 | const u32 hs = | ||
| 96 | SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); | ||
| 97 | const u32 w1 = params.GetBlockAlignedWidth(); | ||
| 98 | if (std::tie(w1, params.height) == std::tie(ws, hs)) { | ||
| 99 | return MatchStructureResult::SemiMatch; | ||
| 100 | } | ||
| 101 | } | ||
| 102 | return MatchStructureResult::None; | ||
| 103 | } | ||
| 104 | |||
| 105 | std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( | ||
| 106 | const GPUVAddr candidate_gpu_addr) const { | ||
| 107 | if (gpu_addr == candidate_gpu_addr) { | ||
| 108 | return {{0, 0}}; | ||
| 109 | } | ||
| 110 | if (candidate_gpu_addr < gpu_addr) { | ||
| 111 | return {}; | ||
| 112 | } | ||
| 113 | const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; | ||
| 114 | const auto layer{static_cast<u32>(relative_address / layer_size)}; | ||
| 115 | const GPUVAddr mipmap_address = relative_address - layer_size * layer; | ||
| 116 | const auto mipmap_it = | ||
| 117 | Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); | ||
| 118 | if (mipmap_it == mipmap_offsets.end()) { | ||
| 119 | return {}; | ||
| 120 | } | ||
| 121 | const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; | ||
| 122 | return std::make_pair(layer, level); | ||
| 123 | } | ||
| 124 | |||
| 125 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { | ||
| 126 | const u32 layers{params.depth}; | ||
| 127 | const u32 mipmaps{params.num_levels}; | ||
| 128 | std::vector<CopyParams> result; | ||
| 129 | result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); | ||
| 130 | |||
| 131 | for (u32 layer = 0; layer < layers; layer++) { | ||
| 132 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 133 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 134 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 135 | result.emplace_back(width, height, layer, level); | ||
| 136 | } | ||
| 137 | } | ||
| 138 | return result; | ||
| 139 | } | ||
| 140 | |||
| 141 | std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { | ||
| 142 | const u32 mipmaps{params.num_levels}; | ||
| 143 | std::vector<CopyParams> result; | ||
| 144 | result.reserve(mipmaps); | ||
| 145 | |||
| 146 | for (u32 level = 0; level < mipmaps; level++) { | ||
| 147 | const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); | ||
| 148 | const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); | ||
| 149 | const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; | ||
| 150 | result.emplace_back(width, height, depth, level); | ||
| 151 | } | ||
| 152 | return result; | ||
| 153 | } | ||
| 154 | |||
| 155 | void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, | ||
| 156 | u8* buffer, u32 level) { | ||
| 157 | const u32 width{params.GetMipWidth(level)}; | ||
| 158 | const u32 height{params.GetMipHeight(level)}; | ||
| 159 | const u32 block_height{params.GetMipBlockHeight(level)}; | ||
| 160 | const u32 block_depth{params.GetMipBlockDepth(level)}; | ||
| 161 | |||
| 162 | std::size_t guest_offset{mipmap_offsets[level]}; | ||
| 163 | if (params.is_layered) { | ||
| 164 | std::size_t host_offset{0}; | ||
| 165 | const std::size_t guest_stride = layer_size; | ||
| 166 | const std::size_t host_stride = params.GetHostLayerSize(level); | ||
| 167 | for (u32 layer = 0; layer < params.depth; ++layer) { | ||
| 168 | MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, | ||
| 169 | params.tile_width_spacing, buffer + host_offset, memory + guest_offset); | ||
| 170 | guest_offset += guest_stride; | ||
| 171 | host_offset += host_stride; | ||
| 172 | } | ||
| 173 | } else { | ||
| 174 | MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, | ||
| 175 | params.GetMipDepth(level), params.tile_width_spacing, buffer, | ||
| 176 | memory + guest_offset); | ||
| 177 | } | ||
| 178 | } | ||
| 179 | |||
| 180 | void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | ||
| 181 | StagingCache& staging_cache) { | ||
| 182 | MICROPROFILE_SCOPE(GPU_Load_Texture); | ||
| 183 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 184 | u8* host_ptr; | ||
| 185 | is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); | ||
| 186 | |||
| 187 | // Handle continuouty | ||
| 188 | if (is_continuous) { | ||
| 189 | // Use physical memory directly | ||
| 190 | host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 191 | if (!host_ptr) { | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | } else { | ||
| 195 | // Use an extra temporal buffer | ||
| 196 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 197 | tmp_buffer.resize(guest_memory_size); | ||
| 198 | host_ptr = tmp_buffer.data(); | ||
| 199 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (params.is_tiled) { | ||
| 203 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | ||
| 204 | params.block_width, static_cast<u32>(params.target)); | ||
| 205 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 206 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | ||
| 207 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, | ||
| 208 | staging_buffer.data() + host_offset, level); | ||
| 209 | } | ||
| 210 | } else { | ||
| 211 | ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); | ||
| 212 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 213 | const u32 block_width{params.GetDefaultBlockWidth()}; | ||
| 214 | const u32 block_height{params.GetDefaultBlockHeight()}; | ||
| 215 | const u32 width{(params.width + block_width - 1) / block_width}; | ||
| 216 | const u32 height{(params.height + block_height - 1) / block_height}; | ||
| 217 | const u32 copy_size{width * bpp}; | ||
| 218 | if (params.pitch == copy_size) { | ||
| 219 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); | ||
| 220 | } else { | ||
| 221 | const u8* start{host_ptr}; | ||
| 222 | u8* write_to{staging_buffer.data()}; | ||
| 223 | for (u32 h = height; h > 0; --h) { | ||
| 224 | std::memcpy(write_to, start, copy_size); | ||
| 225 | start += params.pitch; | ||
| 226 | write_to += copy_size; | ||
| 227 | } | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | auto compression_type = params.GetCompressionType(); | ||
| 232 | if (compression_type == SurfaceCompression::None || | ||
| 233 | compression_type == SurfaceCompression::Compressed) | ||
| 234 | return; | ||
| 235 | |||
| 236 | for (u32 level_up = params.num_levels; level_up > 0; --level_up) { | ||
| 237 | const u32 level = level_up - 1; | ||
| 238 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; | ||
| 239 | const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged | ||
| 240 | ? in_host_offset | ||
| 241 | : params.GetConvertedMipmapOffset(level); | ||
| 242 | u8* in_buffer = staging_buffer.data() + in_host_offset; | ||
| 243 | u8* out_buffer = staging_buffer.data() + out_host_offset; | ||
| 244 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, | ||
| 245 | params.GetMipWidth(level), params.GetMipHeight(level), | ||
| 246 | params.GetMipDepth(level), true, true); | ||
| 247 | } | ||
| 248 | } | ||
| 249 | |||
| 250 | void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | ||
| 251 | StagingCache& staging_cache) { | ||
| 252 | MICROPROFILE_SCOPE(GPU_Flush_Texture); | ||
| 253 | auto& staging_buffer = staging_cache.GetBuffer(0); | ||
| 254 | u8* host_ptr; | ||
| 255 | |||
| 256 | // Handle continuouty | ||
| 257 | if (is_continuous) { | ||
| 258 | // Use physical memory directly | ||
| 259 | host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 260 | if (!host_ptr) { | ||
| 261 | return; | ||
| 262 | } | ||
| 263 | } else { | ||
| 264 | // Use an extra temporal buffer | ||
| 265 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 266 | tmp_buffer.resize(guest_memory_size); | ||
| 267 | host_ptr = tmp_buffer.data(); | ||
| 268 | } | ||
| 269 | |||
| 270 | if (params.is_tiled) { | ||
| 271 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | ||
| 272 | for (u32 level = 0; level < params.num_levels; ++level) { | ||
| 273 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | ||
| 274 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, | ||
| 275 | staging_buffer.data() + host_offset, level); | ||
| 276 | } | ||
| 277 | } else { | ||
| 278 | ASSERT(params.target == SurfaceTarget::Texture2D); | ||
| 279 | ASSERT(params.num_levels == 1); | ||
| 280 | |||
| 281 | const u32 bpp{params.GetBytesPerPixel()}; | ||
| 282 | const u32 copy_size{params.width * bpp}; | ||
| 283 | if (params.pitch == copy_size) { | ||
| 284 | std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); | ||
| 285 | } else { | ||
| 286 | u8* start{host_ptr}; | ||
| 287 | const u8* read_to{staging_buffer.data()}; | ||
| 288 | for (u32 h = params.height; h > 0; --h) { | ||
| 289 | std::memcpy(start, read_to, copy_size); | ||
| 290 | start += params.pitch; | ||
| 291 | read_to += copy_size; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | } | ||
| 295 | if (!is_continuous) { | ||
| 296 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 297 | } | ||
| 298 | } | ||
| 299 | |||
| 300 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h new file mode 100644 index 000000000..8ba386a8a --- /dev/null +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -0,0 +1,317 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/binary_find.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/gpu.h" | ||
| 15 | #include "video_core/morton.h" | ||
| 16 | #include "video_core/texture_cache/copy_params.h" | ||
| 17 | #include "video_core/texture_cache/surface_params.h" | ||
| 18 | #include "video_core/texture_cache/surface_view.h" | ||
| 19 | |||
| 20 | namespace Tegra { | ||
| 21 | class MemoryManager; | ||
| 22 | } | ||
| 23 | |||
| 24 | namespace VideoCommon { | ||
| 25 | |||
| 26 | using VideoCore::MortonSwizzleMode; | ||
| 27 | using VideoCore::Surface::SurfaceTarget; | ||
| 28 | |||
| 29 | enum class MatchStructureResult : u32 { | ||
| 30 | FullMatch = 0, | ||
| 31 | SemiMatch = 1, | ||
| 32 | None = 2, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class MatchTopologyResult : u32 { | ||
| 36 | FullMatch = 0, | ||
| 37 | CompressUnmatch = 1, | ||
| 38 | None = 2, | ||
| 39 | }; | ||
| 40 | |||
| 41 | class StagingCache { | ||
| 42 | public: | ||
| 43 | explicit StagingCache(); | ||
| 44 | ~StagingCache(); | ||
| 45 | |||
| 46 | std::vector<u8>& GetBuffer(std::size_t index) { | ||
| 47 | return staging_buffer[index]; | ||
| 48 | } | ||
| 49 | |||
| 50 | const std::vector<u8>& GetBuffer(std::size_t index) const { | ||
| 51 | return staging_buffer[index]; | ||
| 52 | } | ||
| 53 | |||
| 54 | void SetSize(std::size_t size) { | ||
| 55 | staging_buffer.resize(size); | ||
| 56 | } | ||
| 57 | |||
| 58 | private: | ||
| 59 | std::vector<std::vector<u8>> staging_buffer; | ||
| 60 | }; | ||
| 61 | |||
| 62 | class SurfaceBaseImpl { | ||
| 63 | public: | ||
| 64 | void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 65 | |||
| 66 | void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); | ||
| 67 | |||
| 68 | GPUVAddr GetGpuAddr() const { | ||
| 69 | return gpu_addr; | ||
| 70 | } | ||
| 71 | |||
| 72 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | ||
| 73 | return (cache_addr < end) && (cache_addr_end > start); | ||
| 74 | } | ||
| 75 | |||
| 76 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { | ||
| 77 | const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; | ||
| 78 | return (gpu_addr <= other_start && other_end <= gpu_addr_end); | ||
| 79 | } | ||
| 80 | |||
| 81 | // Use only when recycling a surface | ||
| 82 | void SetGpuAddr(const GPUVAddr new_addr) { | ||
| 83 | gpu_addr = new_addr; | ||
| 84 | } | ||
| 85 | |||
| 86 | VAddr GetCpuAddr() const { | ||
| 87 | return cpu_addr; | ||
| 88 | } | ||
| 89 | |||
| 90 | void SetCpuAddr(const VAddr new_addr) { | ||
| 91 | cpu_addr = new_addr; | ||
| 92 | } | ||
| 93 | |||
| 94 | CacheAddr GetCacheAddr() const { | ||
| 95 | return cache_addr; | ||
| 96 | } | ||
| 97 | |||
| 98 | CacheAddr GetCacheAddrEnd() const { | ||
| 99 | return cache_addr_end; | ||
| 100 | } | ||
| 101 | |||
| 102 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 103 | cache_addr = new_addr; | ||
| 104 | cache_addr_end = new_addr + guest_memory_size; | ||
| 105 | } | ||
| 106 | |||
| 107 | const SurfaceParams& GetSurfaceParams() const { | ||
| 108 | return params; | ||
| 109 | } | ||
| 110 | |||
| 111 | std::size_t GetSizeInBytes() const { | ||
| 112 | return guest_memory_size; | ||
| 113 | } | ||
| 114 | |||
| 115 | std::size_t GetHostSizeInBytes() const { | ||
| 116 | return host_memory_size; | ||
| 117 | } | ||
| 118 | |||
| 119 | std::size_t GetMipmapSize(const u32 level) const { | ||
| 120 | return mipmap_sizes[level]; | ||
| 121 | } | ||
| 122 | |||
| 123 | void MarkAsContinuous(const bool is_continuous) { | ||
| 124 | this->is_continuous = is_continuous; | ||
| 125 | } | ||
| 126 | |||
| 127 | bool IsContinuous() const { | ||
| 128 | return is_continuous; | ||
| 129 | } | ||
| 130 | |||
| 131 | bool IsLinear() const { | ||
| 132 | return !params.is_tiled; | ||
| 133 | } | ||
| 134 | |||
| 135 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { | ||
| 136 | return params.pixel_format == pixel_format; | ||
| 137 | } | ||
| 138 | |||
| 139 | VideoCore::Surface::PixelFormat GetFormat() const { | ||
| 140 | return params.pixel_format; | ||
| 141 | } | ||
| 142 | |||
| 143 | bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { | ||
| 144 | return params.target == target; | ||
| 145 | } | ||
| 146 | |||
| 147 | MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; | ||
| 148 | |||
| 149 | MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; | ||
| 150 | |||
| 151 | bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { | ||
| 152 | return std::tie(gpu_addr, params.target, params.num_levels) == | ||
| 153 | std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && | ||
| 154 | params.target == SurfaceTarget::Texture2D && params.num_levels == 1; | ||
| 155 | } | ||
| 156 | |||
| 157 | std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; | ||
| 158 | |||
| 159 | std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { | ||
| 160 | return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); | ||
| 161 | } | ||
| 162 | |||
| 163 | protected: | ||
| 164 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); | ||
| 165 | ~SurfaceBaseImpl() = default; | ||
| 166 | |||
| 167 | virtual void DecorateSurfaceName() = 0; | ||
| 168 | |||
| 169 | const SurfaceParams params; | ||
| 170 | std::size_t layer_size; | ||
| 171 | std::size_t guest_memory_size; | ||
| 172 | const std::size_t host_memory_size; | ||
| 173 | GPUVAddr gpu_addr{}; | ||
| 174 | CacheAddr cache_addr{}; | ||
| 175 | CacheAddr cache_addr_end{}; | ||
| 176 | VAddr cpu_addr{}; | ||
| 177 | bool is_continuous{}; | ||
| 178 | |||
| 179 | std::vector<std::size_t> mipmap_sizes; | ||
| 180 | std::vector<std::size_t> mipmap_offsets; | ||
| 181 | |||
| 182 | private: | ||
| 183 | void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, | ||
| 184 | u32 level); | ||
| 185 | |||
| 186 | std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; | ||
| 187 | |||
| 188 | std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; | ||
| 189 | }; | ||
| 190 | |||
| 191 | template <typename TView> | ||
| 192 | class SurfaceBase : public SurfaceBaseImpl { | ||
| 193 | public: | ||
| 194 | virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; | ||
| 195 | |||
| 196 | virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; | ||
| 197 | |||
| 198 | void MarkAsModified(const bool is_modified_, const u64 tick) { | ||
| 199 | is_modified = is_modified_ || is_target; | ||
| 200 | modification_tick = tick; | ||
| 201 | } | ||
| 202 | |||
| 203 | void MarkAsRenderTarget(const bool is_target) { | ||
| 204 | this->is_target = is_target; | ||
| 205 | } | ||
| 206 | |||
| 207 | void MarkAsPicked(const bool is_picked) { | ||
| 208 | this->is_picked = is_picked; | ||
| 209 | } | ||
| 210 | |||
| 211 | bool IsModified() const { | ||
| 212 | return is_modified; | ||
| 213 | } | ||
| 214 | |||
| 215 | bool IsProtected() const { | ||
| 216 | // Only 3D Slices are to be protected | ||
| 217 | return is_target && params.block_depth > 0; | ||
| 218 | } | ||
| 219 | |||
| 220 | bool IsRenderTarget() const { | ||
| 221 | return is_target; | ||
| 222 | } | ||
| 223 | |||
| 224 | bool IsRegistered() const { | ||
| 225 | return is_registered; | ||
| 226 | } | ||
| 227 | |||
| 228 | bool IsPicked() const { | ||
| 229 | return is_picked; | ||
| 230 | } | ||
| 231 | |||
| 232 | void MarkAsRegistered(bool is_reg) { | ||
| 233 | is_registered = is_reg; | ||
| 234 | } | ||
| 235 | |||
| 236 | u64 GetModificationTick() const { | ||
| 237 | return modification_tick; | ||
| 238 | } | ||
| 239 | |||
| 240 | TView EmplaceOverview(const SurfaceParams& overview_params) { | ||
| 241 | const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; | ||
| 242 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | ||
| 243 | } | ||
| 244 | |||
| 245 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | ||
| 246 | const GPUVAddr view_addr, | ||
| 247 | const std::size_t candidate_size, const u32 mipmap, | ||
| 248 | const u32 layer) { | ||
| 249 | const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; | ||
| 250 | if (!layer_mipmap) { | ||
| 251 | return {}; | ||
| 252 | } | ||
| 253 | const u32 end_layer{layer_mipmap->first}; | ||
| 254 | const u32 end_mipmap{layer_mipmap->second}; | ||
| 255 | if (layer != end_layer) { | ||
| 256 | if (mipmap == 0 && end_mipmap == 0) { | ||
| 257 | return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1)); | ||
| 258 | } | ||
| 259 | return {}; | ||
| 260 | } else { | ||
| 261 | return GetView( | ||
| 262 | ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | ||
| 267 | const std::size_t candidate_size) { | ||
| 268 | if (params.target == SurfaceTarget::Texture3D || | ||
| 269 | (params.num_levels == 1 && !params.is_layered) || | ||
| 270 | view_params.target == SurfaceTarget::Texture3D) { | ||
| 271 | return {}; | ||
| 272 | } | ||
| 273 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | ||
| 274 | if (!layer_mipmap) { | ||
| 275 | return {}; | ||
| 276 | } | ||
| 277 | const u32 layer{layer_mipmap->first}; | ||
| 278 | const u32 mipmap{layer_mipmap->second}; | ||
| 279 | if (GetMipmapSize(mipmap) != candidate_size) { | ||
| 280 | return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); | ||
| 281 | } | ||
| 282 | return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); | ||
| 283 | } | ||
| 284 | |||
| 285 | TView GetMainView() const { | ||
| 286 | return main_view; | ||
| 287 | } | ||
| 288 | |||
| 289 | protected: | ||
| 290 | explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) | ||
| 291 | : SurfaceBaseImpl(gpu_addr, params) {} | ||
| 292 | |||
| 293 | ~SurfaceBase() = default; | ||
| 294 | |||
| 295 | virtual TView CreateView(const ViewParams& view_key) = 0; | ||
| 296 | |||
| 297 | TView main_view; | ||
| 298 | std::unordered_map<ViewParams, TView> views; | ||
| 299 | |||
| 300 | private: | ||
| 301 | TView GetView(const ViewParams& key) { | ||
| 302 | const auto [entry, is_cache_miss] = views.try_emplace(key); | ||
| 303 | auto& view{entry->second}; | ||
| 304 | if (is_cache_miss) { | ||
| 305 | view = CreateView(key); | ||
| 306 | } | ||
| 307 | return view; | ||
| 308 | } | ||
| 309 | |||
| 310 | bool is_modified{}; | ||
| 311 | bool is_target{}; | ||
| 312 | bool is_registered{}; | ||
| 313 | bool is_picked{}; | ||
| 314 | u64 modification_tick{}; | ||
| 315 | }; | ||
| 316 | |||
| 317 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp new file mode 100644 index 000000000..9c56e2b4f --- /dev/null +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -0,0 +1,334 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <map> | ||
| 6 | |||
| 7 | #include "common/alignment.h" | ||
| 8 | #include "common/bit_util.h" | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/surface.h" | ||
| 12 | #include "video_core/texture_cache/surface_params.h" | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | using VideoCore::Surface::ComponentTypeFromDepthFormat; | ||
| 17 | using VideoCore::Surface::ComponentTypeFromRenderTarget; | ||
| 18 | using VideoCore::Surface::ComponentTypeFromTexture; | ||
| 19 | using VideoCore::Surface::PixelFormat; | ||
| 20 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 21 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 22 | using VideoCore::Surface::PixelFormatFromTextureFormat; | ||
| 23 | using VideoCore::Surface::SurfaceTarget; | ||
| 24 | using VideoCore::Surface::SurfaceTargetFromTextureType; | ||
| 25 | using VideoCore::Surface::SurfaceType; | ||
| 26 | |||
| 27 | SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { | ||
| 28 | switch (type) { | ||
| 29 | case Tegra::Shader::TextureType::Texture1D: { | ||
| 30 | if (is_array) | ||
| 31 | return SurfaceTarget::Texture1DArray; | ||
| 32 | else | ||
| 33 | return SurfaceTarget::Texture1D; | ||
| 34 | } | ||
| 35 | case Tegra::Shader::TextureType::Texture2D: { | ||
| 36 | if (is_array) | ||
| 37 | return SurfaceTarget::Texture2DArray; | ||
| 38 | else | ||
| 39 | return SurfaceTarget::Texture2D; | ||
| 40 | } | ||
| 41 | case Tegra::Shader::TextureType::Texture3D: { | ||
| 42 | ASSERT(!is_array); | ||
| 43 | return SurfaceTarget::Texture3D; | ||
| 44 | } | ||
| 45 | case Tegra::Shader::TextureType::TextureCube: { | ||
| 46 | if (is_array) | ||
| 47 | return SurfaceTarget::TextureCubeArray; | ||
| 48 | else | ||
| 49 | return SurfaceTarget::TextureCubemap; | ||
| 50 | } | ||
| 51 | default: { | ||
| 52 | UNREACHABLE(); | ||
| 53 | return SurfaceTarget::Texture2D; | ||
| 54 | } | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | namespace { | ||
| 59 | constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { | ||
| 60 | return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); | ||
| 61 | } | ||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, | ||
| 65 | const Tegra::Texture::FullTextureInfo& config, | ||
| 66 | const VideoCommon::Shader::Sampler& entry) { | ||
| 67 | SurfaceParams params; | ||
| 68 | params.is_tiled = config.tic.IsTiled(); | ||
| 69 | params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); | ||
| 70 | params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, | ||
| 71 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | ||
| 72 | params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, | ||
| 73 | params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; | ||
| 74 | params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), | ||
| 75 | params.srgb_conversion); | ||
| 76 | params.type = GetFormatType(params.pixel_format); | ||
| 77 | if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { | ||
| 78 | switch (params.pixel_format) { | ||
| 79 | case PixelFormat::R16U: | ||
| 80 | case PixelFormat::R16F: { | ||
| 81 | params.pixel_format = PixelFormat::Z16; | ||
| 82 | break; | ||
| 83 | } | ||
| 84 | case PixelFormat::R32F: { | ||
| 85 | params.pixel_format = PixelFormat::Z32F; | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | default: { | ||
| 89 | UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", | ||
| 90 | static_cast<u32>(params.pixel_format)); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | params.type = GetFormatType(params.pixel_format); | ||
| 94 | } | ||
| 95 | params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); | ||
| 96 | params.type = GetFormatType(params.pixel_format); | ||
| 97 | // TODO: on 1DBuffer we should use the tic info. | ||
| 98 | if (!config.tic.IsBuffer()) { | ||
| 99 | params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); | ||
| 100 | params.width = config.tic.Width(); | ||
| 101 | params.height = config.tic.Height(); | ||
| 102 | params.depth = config.tic.Depth(); | ||
| 103 | params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); | ||
| 104 | if (params.target == SurfaceTarget::TextureCubemap || | ||
| 105 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 106 | params.depth *= 6; | ||
| 107 | } | ||
| 108 | params.num_levels = config.tic.max_mip_level + 1; | ||
| 109 | params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); | ||
| 110 | params.is_layered = params.IsLayered(); | ||
| 111 | } else { | ||
| 112 | params.target = SurfaceTarget::TextureBuffer; | ||
| 113 | params.width = config.tic.Width(); | ||
| 114 | params.pitch = params.width * params.GetBytesPerPixel(); | ||
| 115 | params.height = 1; | ||
| 116 | params.depth = 1; | ||
| 117 | params.num_levels = 1; | ||
| 118 | params.emulated_levels = 1; | ||
| 119 | params.is_layered = false; | ||
| 120 | } | ||
| 121 | return params; | ||
| 122 | } | ||
| 123 | |||
| 124 | SurfaceParams SurfaceParams::CreateForDepthBuffer( | ||
| 125 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 126 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 127 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { | ||
| 128 | SurfaceParams params; | ||
| 129 | params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 130 | params.srgb_conversion = false; | ||
| 131 | params.block_width = std::min(block_width, 5U); | ||
| 132 | params.block_height = std::min(block_height, 5U); | ||
| 133 | params.block_depth = std::min(block_depth, 5U); | ||
| 134 | params.tile_width_spacing = 1; | ||
| 135 | params.pixel_format = PixelFormatFromDepthFormat(format); | ||
| 136 | params.component_type = ComponentTypeFromDepthFormat(format); | ||
| 137 | params.type = GetFormatType(params.pixel_format); | ||
| 138 | params.width = zeta_width; | ||
| 139 | params.height = zeta_height; | ||
| 140 | params.target = SurfaceTarget::Texture2D; | ||
| 141 | params.depth = 1; | ||
| 142 | params.pitch = 0; | ||
| 143 | params.num_levels = 1; | ||
| 144 | params.emulated_levels = 1; | ||
| 145 | params.is_layered = false; | ||
| 146 | return params; | ||
| 147 | } | ||
| 148 | |||
| 149 | SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { | ||
| 150 | const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; | ||
| 151 | SurfaceParams params; | ||
| 152 | params.is_tiled = | ||
| 153 | config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; | ||
| 154 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 155 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 156 | params.block_width = config.memory_layout.block_width; | ||
| 157 | params.block_height = config.memory_layout.block_height; | ||
| 158 | params.block_depth = config.memory_layout.block_depth; | ||
| 159 | params.tile_width_spacing = 1; | ||
| 160 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 161 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 162 | params.type = GetFormatType(params.pixel_format); | ||
| 163 | if (params.is_tiled) { | ||
| 164 | params.pitch = 0; | ||
| 165 | params.width = config.width; | ||
| 166 | } else { | ||
| 167 | const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; | ||
| 168 | params.pitch = config.width; | ||
| 169 | params.width = params.pitch / bpp; | ||
| 170 | } | ||
| 171 | params.height = config.height; | ||
| 172 | params.depth = 1; | ||
| 173 | params.target = SurfaceTarget::Texture2D; | ||
| 174 | params.num_levels = 1; | ||
| 175 | params.emulated_levels = 1; | ||
| 176 | params.is_layered = false; | ||
| 177 | return params; | ||
| 178 | } | ||
| 179 | |||
| 180 | SurfaceParams SurfaceParams::CreateForFermiCopySurface( | ||
| 181 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 182 | SurfaceParams params{}; | ||
| 183 | params.is_tiled = !config.linear; | ||
| 184 | params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || | ||
| 185 | config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; | ||
| 186 | params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, | ||
| 187 | params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, | ||
| 188 | params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, | ||
| 189 | params.tile_width_spacing = 1; | ||
| 190 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | ||
| 191 | params.component_type = ComponentTypeFromRenderTarget(config.format); | ||
| 192 | params.type = GetFormatType(params.pixel_format); | ||
| 193 | params.width = config.width; | ||
| 194 | params.height = config.height; | ||
| 195 | params.pitch = config.pitch; | ||
| 196 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | ||
| 197 | params.target = SurfaceTarget::Texture2D; | ||
| 198 | params.depth = 1; | ||
| 199 | params.num_levels = 1; | ||
| 200 | params.emulated_levels = 1; | ||
| 201 | params.is_layered = params.IsLayered(); | ||
| 202 | return params; | ||
| 203 | } | ||
| 204 | |||
| 205 | bool SurfaceParams::IsLayered() const { | ||
| 206 | switch (target) { | ||
| 207 | case SurfaceTarget::Texture1DArray: | ||
| 208 | case SurfaceTarget::Texture2DArray: | ||
| 209 | case SurfaceTarget::TextureCubemap: | ||
| 210 | case SurfaceTarget::TextureCubeArray: | ||
| 211 | return true; | ||
| 212 | default: | ||
| 213 | return false; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | // Auto block resizing algorithm from: | ||
| 218 | // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c | ||
| 219 | u32 SurfaceParams::GetMipBlockHeight(u32 level) const { | ||
| 220 | if (level == 0) { | ||
| 221 | return this->block_height; | ||
| 222 | } | ||
| 223 | |||
| 224 | const u32 height_new{GetMipHeight(level)}; | ||
| 225 | const u32 default_block_height{GetDefaultBlockHeight()}; | ||
| 226 | const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; | ||
| 227 | const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); | ||
| 228 | return std::clamp(block_height_new, 3U, 7U) - 3U; | ||
| 229 | } | ||
| 230 | |||
| 231 | u32 SurfaceParams::GetMipBlockDepth(u32 level) const { | ||
| 232 | if (level == 0) { | ||
| 233 | return this->block_depth; | ||
| 234 | } | ||
| 235 | if (is_layered) { | ||
| 236 | return 0; | ||
| 237 | } | ||
| 238 | |||
| 239 | const u32 depth_new{GetMipDepth(level)}; | ||
| 240 | const u32 block_depth_new = Common::Log2Ceil32(depth_new); | ||
| 241 | if (block_depth_new > 4) { | ||
| 242 | return 5 - (GetMipBlockHeight(level) >= 2); | ||
| 243 | } | ||
| 244 | return block_depth_new; | ||
| 245 | } | ||
| 246 | |||
| 247 | std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | ||
| 248 | std::size_t offset = 0; | ||
| 249 | for (u32 i = 0; i < level; i++) { | ||
| 250 | offset += GetInnerMipmapMemorySize(i, false, false); | ||
| 251 | } | ||
| 252 | return offset; | ||
| 253 | } | ||
| 254 | |||
| 255 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { | ||
| 256 | std::size_t offset = 0; | ||
| 257 | for (u32 i = 0; i < level; i++) { | ||
| 258 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 259 | } | ||
| 260 | return offset; | ||
| 261 | } | ||
| 262 | |||
| 263 | std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { | ||
| 264 | std::size_t offset = 0; | ||
| 265 | for (u32 i = 0; i < level; i++) { | ||
| 266 | offset += GetConvertedMipmapSize(i); | ||
| 267 | } | ||
| 268 | return offset; | ||
| 269 | } | ||
| 270 | |||
| 271 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { | ||
| 272 | constexpr std::size_t rgba8_bpp = 4ULL; | ||
| 273 | const std::size_t width_t = GetMipWidth(level); | ||
| 274 | const std::size_t height_t = GetMipHeight(level); | ||
| 275 | const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); | ||
| 276 | return width_t * height_t * depth_t * rgba8_bpp; | ||
| 277 | } | ||
| 278 | |||
| 279 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { | ||
| 280 | std::size_t size = 0; | ||
| 281 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 282 | size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); | ||
| 283 | } | ||
| 284 | if (is_tiled && is_layered) { | ||
| 285 | return Common::AlignBits(size, | ||
| 286 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 287 | } | ||
| 288 | return size; | ||
| 289 | } | ||
| 290 | |||
| 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | ||
| 292 | bool uncompressed) const { | ||
| 293 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 294 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | ||
| 295 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | ||
| 296 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | ||
| 297 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, | ||
| 298 | GetMipBlockHeight(level), GetMipBlockDepth(level)); | ||
| 299 | } | ||
| 300 | |||
| 301 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | ||
| 302 | return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, | ||
| 303 | height, depth, pitch, num_levels, pixel_format, component_type, type, target) == | ||
| 304 | std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, | ||
| 305 | rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, | ||
| 306 | rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); | ||
| 307 | } | ||
| 308 | |||
| 309 | std::string SurfaceParams::TargetName() const { | ||
| 310 | switch (target) { | ||
| 311 | case SurfaceTarget::Texture1D: | ||
| 312 | return "1D"; | ||
| 313 | case SurfaceTarget::TextureBuffer: | ||
| 314 | return "TexBuffer"; | ||
| 315 | case SurfaceTarget::Texture2D: | ||
| 316 | return "2D"; | ||
| 317 | case SurfaceTarget::Texture3D: | ||
| 318 | return "3D"; | ||
| 319 | case SurfaceTarget::Texture1DArray: | ||
| 320 | return "1DArray"; | ||
| 321 | case SurfaceTarget::Texture2DArray: | ||
| 322 | return "2DArray"; | ||
| 323 | case SurfaceTarget::TextureCubemap: | ||
| 324 | return "Cube"; | ||
| 325 | case SurfaceTarget::TextureCubeArray: | ||
| 326 | return "CubeArray"; | ||
| 327 | default: | ||
| 328 | LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); | ||
| 329 | UNREACHABLE(); | ||
| 330 | return fmt::format("TUK({})", static_cast<u32>(target)); | ||
| 331 | } | ||
| 332 | } | ||
| 333 | |||
| 334 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h new file mode 100644 index 000000000..358d6757c --- /dev/null +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <map> | ||
| 8 | |||
| 9 | #include "common/alignment.h" | ||
| 10 | #include "common/bit_util.h" | ||
| 11 | #include "common/cityhash.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/fermi_2d.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/surface.h" | ||
| 17 | #include "video_core/textures/decoders.h" | ||
| 18 | |||
| 19 | namespace VideoCommon { | ||
| 20 | |||
| 21 | using VideoCore::Surface::SurfaceCompression; | ||
| 22 | |||
| 23 | class SurfaceParams { | ||
| 24 | public: | ||
| 25 | /// Creates SurfaceCachedParams from a texture configuration. | ||
| 26 | static SurfaceParams CreateForTexture(Core::System& system, | ||
| 27 | const Tegra::Texture::FullTextureInfo& config, | ||
| 28 | const VideoCommon::Shader::Sampler& entry); | ||
| 29 | |||
| 30 | /// Creates SurfaceCachedParams for a depth buffer configuration. | ||
| 31 | static SurfaceParams CreateForDepthBuffer( | ||
| 32 | Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, | ||
| 33 | u32 block_width, u32 block_height, u32 block_depth, | ||
| 34 | Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); | ||
| 35 | |||
| 36 | /// Creates SurfaceCachedParams from a framebuffer configuration. | ||
| 37 | static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); | ||
| 38 | |||
| 39 | /// Creates SurfaceCachedParams from a Fermi2D surface configuration. | ||
| 40 | static SurfaceParams CreateForFermiCopySurface( | ||
| 41 | const Tegra::Engines::Fermi2D::Regs::Surface& config); | ||
| 42 | |||
| 43 | std::size_t Hash() const { | ||
| 44 | return static_cast<std::size_t>( | ||
| 45 | Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool operator==(const SurfaceParams& rhs) const; | ||
| 49 | |||
| 50 | bool operator!=(const SurfaceParams& rhs) const { | ||
| 51 | return !operator==(rhs); | ||
| 52 | } | ||
| 53 | |||
| 54 | std::size_t GetGuestSizeInBytes() const { | ||
| 55 | return GetInnerMemorySize(false, false, false); | ||
| 56 | } | ||
| 57 | |||
| 58 | std::size_t GetHostSizeInBytes() const { | ||
| 59 | std::size_t host_size_in_bytes; | ||
| 60 | if (GetCompressionType() == SurfaceCompression::Converted) { | ||
| 61 | constexpr std::size_t rgb8_bpp = 4ULL; | ||
| 62 | // ASTC is uncompressed in software, in emulated as RGBA8 | ||
| 63 | host_size_in_bytes = 0; | ||
| 64 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 65 | host_size_in_bytes += GetConvertedMipmapSize(level); | ||
| 66 | } | ||
| 67 | } else { | ||
| 68 | host_size_in_bytes = GetInnerMemorySize(true, false, false); | ||
| 69 | } | ||
| 70 | return host_size_in_bytes; | ||
| 71 | } | ||
| 72 | |||
| 73 | u32 GetBlockAlignedWidth() const { | ||
| 74 | return Common::AlignUp(width, 64 / GetBytesPerPixel()); | ||
| 75 | } | ||
| 76 | |||
| 77 | /// Returns the width of a given mipmap level. | ||
| 78 | u32 GetMipWidth(u32 level) const { | ||
| 79 | return std::max(1U, width >> level); | ||
| 80 | } | ||
| 81 | |||
| 82 | /// Returns the height of a given mipmap level. | ||
| 83 | u32 GetMipHeight(u32 level) const { | ||
| 84 | return std::max(1U, height >> level); | ||
| 85 | } | ||
| 86 | |||
| 87 | /// Returns the depth of a given mipmap level. | ||
| 88 | u32 GetMipDepth(u32 level) const { | ||
| 89 | return is_layered ? depth : std::max(1U, depth >> level); | ||
| 90 | } | ||
| 91 | |||
| 92 | /// Returns the block height of a given mipmap level. | ||
| 93 | u32 GetMipBlockHeight(u32 level) const; | ||
| 94 | |||
| 95 | /// Returns the block depth of a given mipmap level. | ||
| 96 | u32 GetMipBlockDepth(u32 level) const; | ||
| 97 | |||
| 98 | /// Returns the best possible row/pitch alignment for the surface. | ||
| 99 | u32 GetRowAlignment(u32 level) const { | ||
| 100 | const u32 bpp = | ||
| 101 | GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); | ||
| 102 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); | ||
| 103 | } | ||
| 104 | |||
| 105 | /// Returns the offset in bytes in guest memory of a given mipmap level. | ||
| 106 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | ||
| 107 | |||
| 108 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | ||
| 109 | std::size_t GetHostMipmapLevelOffset(u32 level) const; | ||
| 110 | |||
| 111 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level | ||
| 112 | /// for a texture that is converted in host gpu. | ||
| 113 | std::size_t GetConvertedMipmapOffset(u32 level) const; | ||
| 114 | |||
| 115 | /// Returns the size in bytes in guest memory of a given mipmap level. | ||
| 116 | std::size_t GetGuestMipmapSize(u32 level) const { | ||
| 117 | return GetInnerMipmapMemorySize(level, false, false); | ||
| 118 | } | ||
| 119 | |||
| 120 | /// Returns the size in bytes in host memory (linear) of a given mipmap level. | ||
| 121 | std::size_t GetHostMipmapSize(u32 level) const { | ||
| 122 | return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); | ||
| 123 | } | ||
| 124 | |||
| 125 | std::size_t GetConvertedMipmapSize(u32 level) const; | ||
| 126 | |||
| 127 | /// Returns the size of a layer in bytes in guest memory. | ||
| 128 | std::size_t GetGuestLayerSize() const { | ||
| 129 | return GetLayerSize(false, false); | ||
| 130 | } | ||
| 131 | |||
| 132 | /// Returns the size of a layer in bytes in host memory for a given mipmap level. | ||
| 133 | std::size_t GetHostLayerSize(u32 level) const { | ||
| 134 | ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); | ||
| 135 | return GetInnerMipmapMemorySize(level, true, false); | ||
| 136 | } | ||
| 137 | |||
| 138 | /// Returns the max possible mipmap that the texture can have in host gpu | ||
| 139 | u32 MaxPossibleMipmap() const { | ||
| 140 | const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; | ||
| 141 | const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; | ||
| 142 | const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); | ||
| 143 | if (target != VideoCore::Surface::SurfaceTarget::Texture3D) | ||
| 144 | return max_mipmap; | ||
| 145 | return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); | ||
| 146 | } | ||
| 147 | |||
| 148 | /// Returns if the guest surface is a compressed surface. | ||
| 149 | bool IsCompressed() const { | ||
| 150 | return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; | ||
| 151 | } | ||
| 152 | |||
| 153 | /// Returns the default block width. | ||
| 154 | u32 GetDefaultBlockWidth() const { | ||
| 155 | return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Returns the default block height. | ||
| 159 | u32 GetDefaultBlockHeight() const { | ||
| 160 | return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); | ||
| 161 | } | ||
| 162 | |||
| 163 | /// Returns the bits per pixel. | ||
| 164 | u32 GetBitsPerPixel() const { | ||
| 165 | return VideoCore::Surface::GetFormatBpp(pixel_format); | ||
| 166 | } | ||
| 167 | |||
| 168 | /// Returns the bytes per pixel. | ||
| 169 | u32 GetBytesPerPixel() const { | ||
| 170 | return VideoCore::Surface::GetBytesPerPixel(pixel_format); | ||
| 171 | } | ||
| 172 | |||
| 173 | /// Returns true if the pixel format is a depth and/or stencil format. | ||
| 174 | bool IsPixelFormatZeta() const { | ||
| 175 | return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && | ||
| 176 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | ||
| 177 | } | ||
| 178 | |||
| 179 | /// Returns how the compression should be handled for this texture. | ||
| 180 | SurfaceCompression GetCompressionType() const { | ||
| 181 | return VideoCore::Surface::GetFormatCompressionType(pixel_format); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// Returns is the surface is a TextureBuffer type of surface. | ||
| 185 | bool IsBuffer() const { | ||
| 186 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | ||
| 187 | } | ||
| 188 | |||
| 189 | /// Returns the debug name of the texture for use in graphic debuggers. | ||
| 190 | std::string TargetName() const; | ||
| 191 | |||
| 192 | // Helper used for out of class size calculations | ||
| 193 | static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, | ||
| 194 | const u32 block_depth) { | ||
| 195 | return Common::AlignBits(out_size, | ||
| 196 | Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); | ||
| 197 | } | ||
| 198 | |||
| 199 | /// Converts a width from a type of surface into another. This helps represent the | ||
| 200 | /// equivalent value between compressed/non-compressed textures. | ||
| 201 | static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 202 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 203 | const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); | ||
| 204 | const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); | ||
| 205 | return (width * bw2 + bw1 - 1) / bw1; | ||
| 206 | } | ||
| 207 | |||
| 208 | /// Converts a height from a type of surface into another. This helps represent the | ||
| 209 | /// equivalent value between compressed/non-compressed textures. | ||
| 210 | static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, | ||
| 211 | VideoCore::Surface::PixelFormat pixel_format_to) { | ||
| 212 | const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); | ||
| 213 | const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); | ||
| 214 | return (height * bh2 + bh1 - 1) / bh1; | ||
| 215 | } | ||
| 216 | |||
| 217 | // Finds the maximun possible width between 2 2D layers of different formats | ||
| 218 | static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 219 | const u32 src_level, const u32 dst_level) { | ||
| 220 | const u32 bw1 = src_params.GetDefaultBlockWidth(); | ||
| 221 | const u32 bw2 = dst_params.GetDefaultBlockWidth(); | ||
| 222 | const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; | ||
| 223 | const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; | ||
| 224 | return std::min(t_src_width, t_dst_width); | ||
| 225 | } | ||
| 226 | |||
| 227 | // Finds the maximun possible height between 2 2D layers of different formats | ||
| 228 | static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, | ||
| 229 | const u32 src_level, const u32 dst_level) { | ||
| 230 | const u32 bh1 = src_params.GetDefaultBlockHeight(); | ||
| 231 | const u32 bh2 = dst_params.GetDefaultBlockHeight(); | ||
| 232 | const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; | ||
| 233 | const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; | ||
| 234 | return std::min(t_src_height, t_dst_height); | ||
| 235 | } | ||
| 236 | |||
| 237 | bool is_tiled; | ||
| 238 | bool srgb_conversion; | ||
| 239 | bool is_layered; | ||
| 240 | u32 block_width; | ||
| 241 | u32 block_height; | ||
| 242 | u32 block_depth; | ||
| 243 | u32 tile_width_spacing; | ||
| 244 | u32 width; | ||
| 245 | u32 height; | ||
| 246 | u32 depth; | ||
| 247 | u32 pitch; | ||
| 248 | u32 num_levels; | ||
| 249 | u32 emulated_levels; | ||
| 250 | VideoCore::Surface::PixelFormat pixel_format; | ||
| 251 | VideoCore::Surface::ComponentType component_type; | ||
| 252 | VideoCore::Surface::SurfaceType type; | ||
| 253 | VideoCore::Surface::SurfaceTarget target; | ||
| 254 | |||
| 255 | private: | ||
| 256 | /// Returns the size of a given mipmap level inside a layer. | ||
| 257 | std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; | ||
| 258 | |||
| 259 | /// Returns the size of all mipmap levels and aligns as needed. | ||
| 260 | std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { | ||
| 261 | return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); | ||
| 262 | } | ||
| 263 | |||
| 264 | /// Returns the size of a layer | ||
| 265 | std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; | ||
| 266 | |||
| 267 | std::size_t GetNumLayers() const { | ||
| 268 | return is_layered ? depth : 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | /// Returns true if these parameters are from a layered surface. | ||
| 272 | bool IsLayered() const; | ||
| 273 | }; | ||
| 274 | |||
| 275 | } // namespace VideoCommon | ||
| 276 | |||
| 277 | namespace std { | ||
| 278 | |||
| 279 | template <> | ||
| 280 | struct hash<VideoCommon::SurfaceParams> { | ||
| 281 | std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { | ||
| 282 | return k.Hash(); | ||
| 283 | } | ||
| 284 | }; | ||
| 285 | |||
| 286 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp new file mode 100644 index 000000000..467696a4c --- /dev/null +++ b/src/video_core/texture_cache/surface_view.cpp | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/texture_cache/surface_view.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | std::size_t ViewParams::Hash() const { | ||
| 13 | return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^ | ||
| 14 | (static_cast<std::size_t>(base_level) << 24) ^ | ||
| 15 | (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); | ||
| 16 | } | ||
| 17 | |||
| 18 | bool ViewParams::operator==(const ViewParams& rhs) const { | ||
| 19 | return std::tie(base_layer, num_layers, base_level, num_levels, target) == | ||
| 20 | std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); | ||
| 21 | } | ||
| 22 | |||
| 23 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h new file mode 100644 index 000000000..04ca5639b --- /dev/null +++ b/src/video_core/texture_cache/surface_view.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/surface.h" | ||
| 11 | #include "video_core/texture_cache/surface_params.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | struct ViewParams { | ||
| 16 | ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, | ||
| 17 | u32 base_level, u32 num_levels) | ||
| 18 | : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, | ||
| 19 | num_levels{num_levels} {} | ||
| 20 | |||
| 21 | std::size_t Hash() const; | ||
| 22 | |||
| 23 | bool operator==(const ViewParams& rhs) const; | ||
| 24 | |||
| 25 | VideoCore::Surface::SurfaceTarget target{}; | ||
| 26 | u32 base_layer{}; | ||
| 27 | u32 num_layers{}; | ||
| 28 | u32 base_level{}; | ||
| 29 | u32 num_levels{}; | ||
| 30 | |||
| 31 | bool IsLayered() const { | ||
| 32 | switch (target) { | ||
| 33 | case VideoCore::Surface::SurfaceTarget::Texture1DArray: | ||
| 34 | case VideoCore::Surface::SurfaceTarget::Texture2DArray: | ||
| 35 | case VideoCore::Surface::SurfaceTarget::TextureCubemap: | ||
| 36 | case VideoCore::Surface::SurfaceTarget::TextureCubeArray: | ||
| 37 | return true; | ||
| 38 | default: | ||
| 39 | return false; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | }; | ||
| 43 | |||
| 44 | class ViewBase { | ||
| 45 | public: | ||
| 46 | ViewBase(const ViewParams& params) : params{params} {} | ||
| 47 | |||
| 48 | const ViewParams& GetViewParams() const { | ||
| 49 | return params; | ||
| 50 | } | ||
| 51 | |||
| 52 | protected: | ||
| 53 | ViewParams params; | ||
| 54 | }; | ||
| 55 | |||
| 56 | } // namespace VideoCommon | ||
| 57 | |||
| 58 | namespace std { | ||
| 59 | |||
| 60 | template <> | ||
| 61 | struct hash<VideoCommon::ViewParams> { | ||
| 62 | std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { | ||
| 63 | return k.Hash(); | ||
| 64 | } | ||
| 65 | }; | ||
| 66 | |||
| 67 | } // namespace std | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h new file mode 100644 index 000000000..c9e72531a --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -0,0 +1,814 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <memory> | ||
| 10 | #include <mutex> | ||
| 11 | #include <set> | ||
| 12 | #include <tuple> | ||
| 13 | #include <unordered_map> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include <boost/icl/interval_map.hpp> | ||
| 17 | #include <boost/range/iterator_range.hpp> | ||
| 18 | |||
| 19 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | ||
| 21 | #include "common/math_util.h" | ||
| 22 | #include "core/core.h" | ||
| 23 | #include "core/memory.h" | ||
| 24 | #include "core/settings.h" | ||
| 25 | #include "video_core/engines/fermi_2d.h" | ||
| 26 | #include "video_core/engines/maxwell_3d.h" | ||
| 27 | #include "video_core/gpu.h" | ||
| 28 | #include "video_core/memory_manager.h" | ||
| 29 | #include "video_core/rasterizer_interface.h" | ||
| 30 | #include "video_core/surface.h" | ||
| 31 | #include "video_core/texture_cache/copy_params.h" | ||
| 32 | #include "video_core/texture_cache/surface_base.h" | ||
| 33 | #include "video_core/texture_cache/surface_params.h" | ||
| 34 | #include "video_core/texture_cache/surface_view.h" | ||
| 35 | |||
| 36 | namespace Tegra::Texture { | ||
| 37 | struct FullTextureInfo; | ||
| 38 | } | ||
| 39 | |||
| 40 | namespace VideoCore { | ||
| 41 | class RasterizerInterface; | ||
| 42 | } | ||
| 43 | |||
| 44 | namespace VideoCommon { | ||
| 45 | |||
| 46 | using VideoCore::Surface::PixelFormat; | ||
| 47 | |||
| 48 | using VideoCore::Surface::SurfaceTarget; | ||
| 49 | using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | ||
| 50 | |||
| 51 | template <typename TSurface, typename TView> | ||
| 52 | class TextureCache { | ||
| 53 | using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; | ||
| 54 | using IntervalType = typename IntervalMap::interval_type; | ||
| 55 | |||
| 56 | public: | ||
| 57 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | ||
| 58 | std::lock_guard lock{mutex}; | ||
| 59 | |||
| 60 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | ||
| 61 | Unregister(surface); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | /*** | ||
| 66 | * `Guard` guarantees that rendertargets don't unregister themselves if the | ||
| 67 | * collide. Protection is currently only done on 3D slices. | ||
| 68 | ***/ | ||
| 69 | void GuardRenderTargets(bool new_guard) { | ||
| 70 | guard_render_targets = new_guard; | ||
| 71 | } | ||
| 72 | |||
| 73 | void GuardSamplers(bool new_guard) { | ||
| 74 | guard_samplers = new_guard; | ||
| 75 | } | ||
| 76 | |||
| 77 | void FlushRegion(CacheAddr addr, std::size_t size) { | ||
| 78 | std::lock_guard lock{mutex}; | ||
| 79 | |||
| 80 | auto surfaces = GetSurfacesInRegion(addr, size); | ||
| 81 | if (surfaces.empty()) { | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { | ||
| 85 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 86 | }); | ||
| 87 | for (const auto& surface : surfaces) { | ||
| 88 | FlushSurface(surface); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, | ||
| 93 | const VideoCommon::Shader::Sampler& entry) { | ||
| 94 | std::lock_guard lock{mutex}; | ||
| 95 | const auto gpu_addr{config.tic.Address()}; | ||
| 96 | if (!gpu_addr) { | ||
| 97 | return {}; | ||
| 98 | } | ||
| 99 | const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; | ||
| 100 | const auto [surface, view] = GetSurface(gpu_addr, params, true, false); | ||
| 101 | if (guard_samplers) { | ||
| 102 | sampled_textures.push_back(surface); | ||
| 103 | } | ||
| 104 | return view; | ||
| 105 | } | ||
| 106 | |||
| 107 | bool TextureBarrier() { | ||
| 108 | const bool any_rt = | ||
| 109 | std::any_of(sampled_textures.begin(), sampled_textures.end(), | ||
| 110 | [](const auto& surface) { return surface->IsRenderTarget(); }); | ||
| 111 | sampled_textures.clear(); | ||
| 112 | return any_rt; | ||
| 113 | } | ||
| 114 | |||
| 115 | TView GetDepthBufferSurface(bool preserve_contents) { | ||
| 116 | std::lock_guard lock{mutex}; | ||
| 117 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 118 | |||
| 119 | if (!maxwell3d.dirty_flags.zeta_buffer) { | ||
| 120 | return depth_buffer.view; | ||
| 121 | } | ||
| 122 | maxwell3d.dirty_flags.zeta_buffer = false; | ||
| 123 | |||
| 124 | const auto& regs{maxwell3d.regs}; | ||
| 125 | const auto gpu_addr{regs.zeta.Address()}; | ||
| 126 | if (!gpu_addr || !regs.zeta_enable) { | ||
| 127 | SetEmptyDepthBuffer(); | ||
| 128 | return {}; | ||
| 129 | } | ||
| 130 | const auto depth_params{SurfaceParams::CreateForDepthBuffer( | ||
| 131 | system, regs.zeta_width, regs.zeta_height, regs.zeta.format, | ||
| 132 | regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, | ||
| 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||
| 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | ||
| 135 | if (depth_buffer.target) | ||
| 136 | depth_buffer.target->MarkAsRenderTarget(false); | ||
| 137 | depth_buffer.target = surface_view.first; | ||
| 138 | depth_buffer.view = surface_view.second; | ||
| 139 | if (depth_buffer.target) | ||
| 140 | depth_buffer.target->MarkAsRenderTarget(true); | ||
| 141 | return surface_view.second; | ||
| 142 | } | ||
| 143 | |||
| 144 | TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { | ||
| 145 | std::lock_guard lock{mutex}; | ||
| 146 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | ||
| 147 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 148 | if (!maxwell3d.dirty_flags.color_buffer[index]) { | ||
| 149 | return render_targets[index].view; | ||
| 150 | } | ||
| 151 | maxwell3d.dirty_flags.color_buffer.reset(index); | ||
| 152 | |||
| 153 | const auto& regs{maxwell3d.regs}; | ||
| 154 | if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || | ||
| 155 | regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { | ||
| 156 | SetEmptyColorBuffer(index); | ||
| 157 | return {}; | ||
| 158 | } | ||
| 159 | |||
| 160 | const auto& config{regs.rt[index]}; | ||
| 161 | const auto gpu_addr{config.Address()}; | ||
| 162 | if (!gpu_addr) { | ||
| 163 | SetEmptyColorBuffer(index); | ||
| 164 | return {}; | ||
| 165 | } | ||
| 166 | |||
| 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||
| 168 | preserve_contents, true); | ||
| 169 | if (render_targets[index].target) | ||
| 170 | render_targets[index].target->MarkAsRenderTarget(false); | ||
| 171 | render_targets[index].target = surface_view.first; | ||
| 172 | render_targets[index].view = surface_view.second; | ||
| 173 | if (render_targets[index].target) | ||
| 174 | render_targets[index].target->MarkAsRenderTarget(true); | ||
| 175 | return surface_view.second; | ||
| 176 | } | ||
| 177 | |||
| 178 | void MarkColorBufferInUse(std::size_t index) { | ||
| 179 | if (auto& render_target = render_targets[index].target) { | ||
| 180 | render_target->MarkAsModified(true, Tick()); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | void MarkDepthBufferInUse() { | ||
| 185 | if (depth_buffer.target) { | ||
| 186 | depth_buffer.target->MarkAsModified(true, Tick()); | ||
| 187 | } | ||
| 188 | } | ||
| 189 | |||
| 190 | void SetEmptyDepthBuffer() { | ||
| 191 | if (depth_buffer.target == nullptr) { | ||
| 192 | return; | ||
| 193 | } | ||
| 194 | depth_buffer.target->MarkAsRenderTarget(false); | ||
| 195 | depth_buffer.target = nullptr; | ||
| 196 | depth_buffer.view = nullptr; | ||
| 197 | } | ||
| 198 | |||
| 199 | void SetEmptyColorBuffer(std::size_t index) { | ||
| 200 | if (render_targets[index].target == nullptr) { | ||
| 201 | return; | ||
| 202 | } | ||
| 203 | render_targets[index].target->MarkAsRenderTarget(false); | ||
| 204 | render_targets[index].target = nullptr; | ||
| 205 | render_targets[index].view = nullptr; | ||
| 206 | } | ||
| 207 | |||
| 208 | void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||
| 209 | const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, | ||
| 210 | const Tegra::Engines::Fermi2D::Config& copy_config) { | ||
| 211 | std::lock_guard lock{mutex}; | ||
| 212 | std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config); | ||
| 213 | std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config); | ||
| 214 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | ||
| 215 | dst_surface.first->MarkAsModified(true, Tick()); | ||
| 216 | } | ||
| 217 | |||
| 218 | TSurface TryFindFramebufferSurface(const u8* host_ptr) { | ||
| 219 | const CacheAddr cache_addr = ToCacheAddr(host_ptr); | ||
| 220 | if (!cache_addr) { | ||
| 221 | return nullptr; | ||
| 222 | } | ||
| 223 | const CacheAddr page = cache_addr >> registry_page_bits; | ||
| 224 | std::vector<TSurface>& list = registry[page]; | ||
| 225 | for (auto& surface : list) { | ||
| 226 | if (surface->GetCacheAddr() == cache_addr) { | ||
| 227 | return surface; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | return nullptr; | ||
| 231 | } | ||
| 232 | |||
| 233 | u64 Tick() { | ||
| 234 | return ++ticks; | ||
| 235 | } | ||
| 236 | |||
| 237 | protected: | ||
| 238 | TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||
| 239 | : system{system}, rasterizer{rasterizer} { | ||
| 240 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | ||
| 241 | SetEmptyColorBuffer(i); | ||
| 242 | } | ||
| 243 | |||
| 244 | SetEmptyDepthBuffer(); | ||
| 245 | staging_cache.SetSize(2); | ||
| 246 | |||
| 247 | const auto make_siblings = [this](PixelFormat a, PixelFormat b) { | ||
| 248 | siblings_table[static_cast<std::size_t>(a)] = b; | ||
| 249 | siblings_table[static_cast<std::size_t>(b)] = a; | ||
| 250 | }; | ||
| 251 | std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); | ||
| 252 | make_siblings(PixelFormat::Z16, PixelFormat::R16U); | ||
| 253 | make_siblings(PixelFormat::Z32F, PixelFormat::R32F); | ||
| 254 | make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); | ||
| 255 | |||
| 256 | sampled_textures.reserve(64); | ||
| 257 | } | ||
| 258 | |||
| 259 | ~TextureCache() = default; | ||
| 260 | |||
| 261 | virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; | ||
| 262 | |||
| 263 | virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, | ||
| 264 | const CopyParams& copy_params) = 0; | ||
| 265 | |||
| 266 | virtual void ImageBlit(TView& src_view, TView& dst_view, | ||
| 267 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | ||
| 268 | |||
| 269 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | ||
| 270 | // and reading it from a sepparate buffer. | ||
| 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | ||
| 272 | |||
| 273 | void Register(TSurface surface) { | ||
| 274 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||
| 275 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | ||
| 276 | const std::size_t size = surface->GetSizeInBytes(); | ||
| 277 | const std::optional<VAddr> cpu_addr = | ||
| 278 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||
| 279 | if (!cache_ptr || !cpu_addr) { | ||
| 280 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | ||
| 281 | gpu_addr); | ||
| 282 | return; | ||
| 283 | } | ||
| 284 | const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); | ||
| 285 | surface->MarkAsContinuous(continuous); | ||
| 286 | surface->SetCacheAddr(cache_ptr); | ||
| 287 | surface->SetCpuAddr(*cpu_addr); | ||
| 288 | RegisterInnerCache(surface); | ||
| 289 | surface->MarkAsRegistered(true); | ||
| 290 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | ||
| 291 | } | ||
| 292 | |||
| 293 | void Unregister(TSurface surface) { | ||
| 294 | if (guard_render_targets && surface->IsProtected()) { | ||
| 295 | return; | ||
| 296 | } | ||
| 297 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||
| 298 | const CacheAddr cache_ptr = surface->GetCacheAddr(); | ||
| 299 | const std::size_t size = surface->GetSizeInBytes(); | ||
| 300 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 301 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 302 | UnregisterInnerCache(surface); | ||
| 303 | surface->MarkAsRegistered(false); | ||
| 304 | ReserveSurface(surface->GetSurfaceParams(), surface); | ||
| 305 | } | ||
| 306 | |||
| 307 | TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | ||
| 308 | if (const auto surface = TryGetReservedSurface(params); surface) { | ||
| 309 | surface->SetGpuAddr(gpu_addr); | ||
| 310 | return surface; | ||
| 311 | } | ||
| 312 | // No reserved surface available, create a new one and reserve it | ||
| 313 | auto new_surface{CreateSurface(gpu_addr, params)}; | ||
| 314 | return new_surface; | ||
| 315 | } | ||
| 316 | |||
| 317 | std::pair<TSurface, TView> GetFermiSurface( | ||
| 318 | const Tegra::Engines::Fermi2D::Regs::Surface& config) { | ||
| 319 | SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); | ||
| 320 | const GPUVAddr gpu_addr = config.Address(); | ||
| 321 | return GetSurface(gpu_addr, params, true, false); | ||
| 322 | } | ||
| 323 | |||
| 324 | Core::System& system; | ||
| 325 | |||
| 326 | private: | ||
| 327 | enum class RecycleStrategy : u32 { | ||
| 328 | Ignore = 0, | ||
| 329 | Flush = 1, | ||
| 330 | BufferCopy = 3, | ||
| 331 | }; | ||
| 332 | |||
| 333 | /** | ||
| 334 | * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. | ||
| 335 | * @param overlaps, the overlapping surfaces registered in the cache. | ||
| 336 | * @param params, the paremeters on the new surface. | ||
| 337 | * @param gpu_addr, the starting address of the new surface. | ||
| 338 | * @param untopological, tells the recycler that the texture has no way to match the overlaps | ||
| 339 | * due to topological reasons. | ||
| 340 | **/ | ||
| 341 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | ||
| 342 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | ||
| 343 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 344 | return RecycleStrategy::Flush; | ||
| 345 | } | ||
| 346 | // 3D Textures decision | ||
| 347 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | ||
| 348 | return RecycleStrategy::Flush; | ||
| 349 | } | ||
| 350 | for (auto s : overlaps) { | ||
| 351 | const auto& s_params = s->GetSurfaceParams(); | ||
| 352 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { | ||
| 353 | return RecycleStrategy::Flush; | ||
| 354 | } | ||
| 355 | } | ||
| 356 | // Untopological decision | ||
| 357 | if (untopological == MatchTopologyResult::CompressUnmatch) { | ||
| 358 | return RecycleStrategy::Flush; | ||
| 359 | } | ||
| 360 | if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { | ||
| 361 | return RecycleStrategy::Flush; | ||
| 362 | } | ||
| 363 | return RecycleStrategy::Ignore; | ||
| 364 | } | ||
| 365 | |||
| 366 | /** | ||
| 367 | * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in | ||
| 368 | *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the | ||
| 369 | *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the | ||
| 370 | *new surface from that data. | ||
| 371 | * @param overlaps, the overlapping surfaces registered in the cache. | ||
| 372 | * @param params, the paremeters on the new surface. | ||
| 373 | * @param gpu_addr, the starting address of the new surface. | ||
| 374 | * @param preserve_contents, tells if the new surface should be loaded from meory or left blank | ||
| 375 | * @param untopological, tells the recycler that the texture has no way to match the overlaps | ||
| 376 | * due to topological reasons. | ||
| 377 | **/ | ||
| 378 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | ||
| 379 | const SurfaceParams& params, const GPUVAddr gpu_addr, | ||
| 380 | const bool preserve_contents, | ||
| 381 | const MatchTopologyResult untopological) { | ||
| 382 | const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; | ||
| 383 | for (auto& surface : overlaps) { | ||
| 384 | Unregister(surface); | ||
| 385 | } | ||
| 386 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | ||
| 387 | case RecycleStrategy::Ignore: { | ||
| 388 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 389 | } | ||
| 390 | case RecycleStrategy::Flush: { | ||
| 391 | std::sort(overlaps.begin(), overlaps.end(), | ||
| 392 | [](const TSurface& a, const TSurface& b) -> bool { | ||
| 393 | return a->GetModificationTick() < b->GetModificationTick(); | ||
| 394 | }); | ||
| 395 | for (auto& surface : overlaps) { | ||
| 396 | FlushSurface(surface); | ||
| 397 | } | ||
| 398 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 399 | } | ||
| 400 | case RecycleStrategy::BufferCopy: { | ||
| 401 | auto new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 402 | BufferCopy(overlaps[0], new_surface); | ||
| 403 | return {new_surface, new_surface->GetMainView()}; | ||
| 404 | } | ||
| 405 | default: { | ||
| 406 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | ||
| 407 | return InitializeSurface(gpu_addr, params, do_load); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | /** | ||
| 413 | * `RebuildSurface` this method takes a single surface and recreates into another that | ||
| 414 | * may differ in format, target or width alingment. | ||
| 415 | * @param current_surface, the registered surface in the cache which we want to convert. | ||
| 416 | * @param params, the new surface params which we'll use to recreate the surface. | ||
| 417 | **/ | ||
| 418 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, | ||
| 419 | bool is_render) { | ||
| 420 | const auto gpu_addr = current_surface->GetGpuAddr(); | ||
| 421 | const auto& cr_params = current_surface->GetSurfaceParams(); | ||
| 422 | TSurface new_surface; | ||
| 423 | if (cr_params.pixel_format != params.pixel_format && !is_render && | ||
| 424 | GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { | ||
| 425 | SurfaceParams new_params = params; | ||
| 426 | new_params.pixel_format = cr_params.pixel_format; | ||
| 427 | new_params.component_type = cr_params.component_type; | ||
| 428 | new_params.type = cr_params.type; | ||
| 429 | new_surface = GetUncachedSurface(gpu_addr, new_params); | ||
| 430 | } else { | ||
| 431 | new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 432 | } | ||
| 433 | const auto& final_params = new_surface->GetSurfaceParams(); | ||
| 434 | if (cr_params.type != final_params.type || | ||
| 435 | (cr_params.component_type != final_params.component_type)) { | ||
| 436 | BufferCopy(current_surface, new_surface); | ||
| 437 | } else { | ||
| 438 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | ||
| 439 | for (auto& brick : bricks) { | ||
| 440 | ImageCopy(current_surface, new_surface, brick); | ||
| 441 | } | ||
| 442 | } | ||
| 443 | Unregister(current_surface); | ||
| 444 | Register(new_surface); | ||
| 445 | new_surface->MarkAsModified(current_surface->IsModified(), Tick()); | ||
| 446 | return {new_surface, new_surface->GetMainView()}; | ||
| 447 | } | ||
| 448 | |||
| 449 | /** | ||
| 450 | * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's | ||
| 451 | * params if it's an exact match, we return the main view of the registered surface. If it's | ||
| 452 | * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats | ||
| 453 | * match but the targets don't, we create an overview View of the registered surface. | ||
| 454 | * @param current_surface, the registered surface in the cache which we want to convert. | ||
| 455 | * @param params, the new surface params which we want to check. | ||
| 456 | **/ | ||
| 457 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, | ||
| 458 | const SurfaceParams& params, bool is_render) { | ||
| 459 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 460 | const bool matches_target = current_surface->MatchTarget(params.target); | ||
| 461 | const auto match_check = [&]() -> std::pair<TSurface, TView> { | ||
| 462 | if (matches_target) { | ||
| 463 | return {current_surface, current_surface->GetMainView()}; | ||
| 464 | } | ||
| 465 | return {current_surface, current_surface->EmplaceOverview(params)}; | ||
| 466 | }; | ||
| 467 | if (!is_mirage) { | ||
| 468 | return match_check(); | ||
| 469 | } | ||
| 470 | if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { | ||
| 471 | return match_check(); | ||
| 472 | } | ||
| 473 | return RebuildSurface(current_surface, params, is_render); | ||
| 474 | } | ||
| 475 | |||
| 476 | /** | ||
| 477 | * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface | ||
| 478 | * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps | ||
| 479 | * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface | ||
| 480 | * for them, else we return nothing. | ||
| 481 | * @param overlaps, the overlapping surfaces registered in the cache. | ||
| 482 | * @param params, the paremeters on the new surface. | ||
| 483 | * @param gpu_addr, the starting address of the new surface. | ||
| 484 | **/ | ||
| 485 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | ||
| 486 | const SurfaceParams& params, | ||
| 487 | const GPUVAddr gpu_addr) { | ||
| 488 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 489 | return {}; | ||
| 490 | } | ||
| 491 | bool modified = false; | ||
| 492 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 493 | u32 passed_tests = 0; | ||
| 494 | for (auto& surface : overlaps) { | ||
| 495 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 496 | if (src_params.is_layered || src_params.num_levels > 1) { | ||
| 497 | // We send this cases to recycle as they are more complex to handle | ||
| 498 | return {}; | ||
| 499 | } | ||
| 500 | const std::size_t candidate_size = surface->GetSizeInBytes(); | ||
| 501 | auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | ||
| 502 | if (!mipmap_layer) { | ||
| 503 | continue; | ||
| 504 | } | ||
| 505 | const auto [layer, mipmap] = *mipmap_layer; | ||
| 506 | if (new_surface->GetMipmapSize(mipmap) != candidate_size) { | ||
| 507 | continue; | ||
| 508 | } | ||
| 509 | modified |= surface->IsModified(); | ||
| 510 | // Now we got all the data set up | ||
| 511 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | ||
| 512 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 513 | const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); | ||
| 514 | passed_tests++; | ||
| 515 | ImageCopy(surface, new_surface, copy_params); | ||
| 516 | } | ||
| 517 | if (passed_tests == 0) { | ||
| 518 | return {}; | ||
| 519 | // In Accurate GPU all tests should pass, else we recycle | ||
| 520 | } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { | ||
| 521 | return {}; | ||
| 522 | } | ||
| 523 | for (auto surface : overlaps) { | ||
| 524 | Unregister(surface); | ||
| 525 | } | ||
| 526 | new_surface->MarkAsModified(modified, Tick()); | ||
| 527 | Register(new_surface); | ||
| 528 | return {{new_surface, new_surface->GetMainView()}}; | ||
| 529 | } | ||
| 530 | |||
| 531 | /** | ||
| 532 | * `GetSurface` gets the starting address and parameters of a candidate surface and tries | ||
| 533 | * to find a matching surface within the cache. This is done in 3 big steps. The first is to | ||
| 534 | * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. | ||
| 535 | * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from | ||
| 536 | * memory else we move to step 3. Step 3 consists on figuring the relationship between the | ||
| 537 | * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many | ||
| 538 | * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the | ||
| 539 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to | ||
| 540 | * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface | ||
| 541 | * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. | ||
| 542 | * @param gpu_addr, the starting address of the candidate surface. | ||
| 543 | * @param params, the paremeters on the candidate surface. | ||
| 544 | * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. | ||
| 545 | **/ | ||
| 546 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 547 | bool preserve_contents, bool is_render) { | ||
| 548 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | ||
| 549 | const auto cache_addr{ToCacheAddr(host_ptr)}; | ||
| 550 | |||
| 551 | // Step 0: guarantee a valid surface | ||
| 552 | if (!cache_addr) { | ||
| 553 | // Return a null surface if it's invalid | ||
| 554 | SurfaceParams new_params = params; | ||
| 555 | new_params.width = 1; | ||
| 556 | new_params.height = 1; | ||
| 557 | new_params.depth = 1; | ||
| 558 | new_params.block_height = 0; | ||
| 559 | new_params.block_depth = 0; | ||
| 560 | return InitializeSurface(gpu_addr, new_params, false); | ||
| 561 | } | ||
| 562 | |||
| 563 | // Step 1 | ||
| 564 | // Check Level 1 Cache for a fast structural match. If candidate surface | ||
| 565 | // matches at certain level we are pretty much done. | ||
| 566 | if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | ||
| 567 | TSurface& current_surface = iter->second; | ||
| 568 | const auto topological_result = current_surface->MatchesTopology(params); | ||
| 569 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 570 | std::vector<TSurface> overlaps{current_surface}; | ||
| 571 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 572 | topological_result); | ||
| 573 | } | ||
| 574 | const auto struct_result = current_surface->MatchesStructure(params); | ||
| 575 | if (struct_result != MatchStructureResult::None && | ||
| 576 | (params.target != SurfaceTarget::Texture3D || | ||
| 577 | current_surface->MatchTarget(params.target))) { | ||
| 578 | if (struct_result == MatchStructureResult::FullMatch) { | ||
| 579 | return ManageStructuralMatch(current_surface, params, is_render); | ||
| 580 | } else { | ||
| 581 | return RebuildSurface(current_surface, params, is_render); | ||
| 582 | } | ||
| 583 | } | ||
| 584 | } | ||
| 585 | |||
| 586 | // Step 2 | ||
| 587 | // Obtain all possible overlaps in the memory region | ||
| 588 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | ||
| 589 | auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | ||
| 590 | |||
| 591 | // If none are found, we are done. we just load the surface and create it. | ||
| 592 | if (overlaps.empty()) { | ||
| 593 | return InitializeSurface(gpu_addr, params, preserve_contents); | ||
| 594 | } | ||
| 595 | |||
| 596 | // Step 3 | ||
| 597 | // Now we need to figure the relationship between the texture and its overlaps | ||
| 598 | // we do a topological test to ensure we can find some relationship. If it fails | ||
| 599 | // inmediatly recycle the texture | ||
| 600 | for (const auto& surface : overlaps) { | ||
| 601 | const auto topological_result = surface->MatchesTopology(params); | ||
| 602 | if (topological_result != MatchTopologyResult::FullMatch) { | ||
| 603 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 604 | topological_result); | ||
| 605 | } | ||
| 606 | } | ||
| 607 | |||
| 608 | // Split cases between 1 overlap or many. | ||
| 609 | if (overlaps.size() == 1) { | ||
| 610 | TSurface current_surface = overlaps[0]; | ||
| 611 | // First check if the surface is within the overlap. If not, it means | ||
| 612 | // two things either the candidate surface is a supertexture of the overlap | ||
| 613 | // or they don't match in any known way. | ||
| 614 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | ||
| 615 | if (current_surface->GetGpuAddr() == gpu_addr) { | ||
| 616 | std::optional<std::pair<TSurface, TView>> view = | ||
| 617 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 618 | if (view) { | ||
| 619 | return *view; | ||
| 620 | } | ||
| 621 | } | ||
| 622 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 623 | MatchTopologyResult::FullMatch); | ||
| 624 | } | ||
| 625 | // Now we check if the candidate is a mipmap/layer of the overlap | ||
| 626 | std::optional<TView> view = | ||
| 627 | current_surface->EmplaceView(params, gpu_addr, candidate_size); | ||
| 628 | if (view) { | ||
| 629 | const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); | ||
| 630 | if (is_mirage) { | ||
| 631 | // On a mirage view, we need to recreate the surface under this new view | ||
| 632 | // and then obtain a view again. | ||
| 633 | SurfaceParams new_params = current_surface->GetSurfaceParams(); | ||
| 634 | const u32 wh = SurfaceParams::ConvertWidth( | ||
| 635 | new_params.width, new_params.pixel_format, params.pixel_format); | ||
| 636 | const u32 hh = SurfaceParams::ConvertHeight( | ||
| 637 | new_params.height, new_params.pixel_format, params.pixel_format); | ||
| 638 | new_params.width = wh; | ||
| 639 | new_params.height = hh; | ||
| 640 | new_params.pixel_format = params.pixel_format; | ||
| 641 | std::pair<TSurface, TView> pair = | ||
| 642 | RebuildSurface(current_surface, new_params, is_render); | ||
| 643 | std::optional<TView> mirage_view = | ||
| 644 | pair.first->EmplaceView(params, gpu_addr, candidate_size); | ||
| 645 | if (mirage_view) | ||
| 646 | return {pair.first, *mirage_view}; | ||
| 647 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 648 | MatchTopologyResult::FullMatch); | ||
| 649 | } | ||
| 650 | return {current_surface, *view}; | ||
| 651 | } | ||
| 652 | // The next case is unsafe, so if we r in accurate GPU, just skip it | ||
| 653 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 654 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 655 | MatchTopologyResult::FullMatch); | ||
| 656 | } | ||
| 657 | // This is the case the texture is a part of the parent. | ||
| 658 | if (current_surface->MatchesSubTexture(params, gpu_addr)) { | ||
| 659 | return RebuildSurface(current_surface, params, is_render); | ||
| 660 | } | ||
| 661 | } else { | ||
| 662 | // If there are many overlaps, odds are they are subtextures of the candidate | ||
| 663 | // surface. We try to construct a new surface based on the candidate parameters, | ||
| 664 | // using the overlaps. If a single overlap fails, this will fail. | ||
| 665 | std::optional<std::pair<TSurface, TView>> view = | ||
| 666 | TryReconstructSurface(overlaps, params, gpu_addr); | ||
| 667 | if (view) { | ||
| 668 | return *view; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | // We failed all the tests, recycle the overlaps into a new texture. | ||
| 672 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 673 | MatchTopologyResult::FullMatch); | ||
| 674 | } | ||
| 675 | |||
| 676 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | ||
| 677 | bool preserve_contents) { | ||
| 678 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; | ||
| 679 | Register(new_surface); | ||
| 680 | if (preserve_contents) { | ||
| 681 | LoadSurface(new_surface); | ||
| 682 | } | ||
| 683 | return {new_surface, new_surface->GetMainView()}; | ||
| 684 | } | ||
| 685 | |||
| 686 | void LoadSurface(const TSurface& surface) { | ||
| 687 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | ||
| 688 | surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); | ||
| 689 | surface->UploadTexture(staging_cache.GetBuffer(0)); | ||
| 690 | surface->MarkAsModified(false, Tick()); | ||
| 691 | } | ||
| 692 | |||
| 693 | void FlushSurface(const TSurface& surface) { | ||
| 694 | if (!surface->IsModified()) { | ||
| 695 | return; | ||
| 696 | } | ||
| 697 | staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); | ||
| 698 | surface->DownloadTexture(staging_cache.GetBuffer(0)); | ||
| 699 | surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); | ||
| 700 | surface->MarkAsModified(false, Tick()); | ||
| 701 | } | ||
| 702 | |||
| 703 | void RegisterInnerCache(TSurface& surface) { | ||
| 704 | const CacheAddr cache_addr = surface->GetCacheAddr(); | ||
| 705 | CacheAddr start = cache_addr >> registry_page_bits; | ||
| 706 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | ||
| 707 | l1_cache[cache_addr] = surface; | ||
| 708 | while (start <= end) { | ||
| 709 | registry[start].push_back(surface); | ||
| 710 | start++; | ||
| 711 | } | ||
| 712 | } | ||
| 713 | |||
| 714 | void UnregisterInnerCache(TSurface& surface) { | ||
| 715 | const CacheAddr cache_addr = surface->GetCacheAddr(); | ||
| 716 | CacheAddr start = cache_addr >> registry_page_bits; | ||
| 717 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | ||
| 718 | l1_cache.erase(cache_addr); | ||
| 719 | while (start <= end) { | ||
| 720 | auto& reg{registry[start]}; | ||
| 721 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | ||
| 722 | start++; | ||
| 723 | } | ||
| 724 | } | ||
| 725 | |||
| 726 | std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { | ||
| 727 | if (size == 0) { | ||
| 728 | return {}; | ||
| 729 | } | ||
| 730 | const CacheAddr cache_addr_end = cache_addr + size; | ||
| 731 | CacheAddr start = cache_addr >> registry_page_bits; | ||
| 732 | const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; | ||
| 733 | std::vector<TSurface> surfaces; | ||
| 734 | while (start <= end) { | ||
| 735 | std::vector<TSurface>& list = registry[start]; | ||
| 736 | for (auto& surface : list) { | ||
| 737 | if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { | ||
| 738 | surface->MarkAsPicked(true); | ||
| 739 | surfaces.push_back(surface); | ||
| 740 | } | ||
| 741 | } | ||
| 742 | start++; | ||
| 743 | } | ||
| 744 | for (auto& surface : surfaces) { | ||
| 745 | surface->MarkAsPicked(false); | ||
| 746 | } | ||
| 747 | return surfaces; | ||
| 748 | } | ||
| 749 | |||
| 750 | void ReserveSurface(const SurfaceParams& params, TSurface surface) { | ||
| 751 | surface_reserve[params].push_back(std::move(surface)); | ||
| 752 | } | ||
| 753 | |||
| 754 | TSurface TryGetReservedSurface(const SurfaceParams& params) { | ||
| 755 | auto search{surface_reserve.find(params)}; | ||
| 756 | if (search == surface_reserve.end()) { | ||
| 757 | return {}; | ||
| 758 | } | ||
| 759 | for (auto& surface : search->second) { | ||
| 760 | if (!surface->IsRegistered()) { | ||
| 761 | return surface; | ||
| 762 | } | ||
| 763 | } | ||
| 764 | return {}; | ||
| 765 | } | ||
| 766 | |||
| 767 | constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { | ||
| 768 | return siblings_table[static_cast<std::size_t>(format)]; | ||
| 769 | } | ||
| 770 | |||
| 771 | struct FramebufferTargetInfo { | ||
| 772 | TSurface target; | ||
| 773 | TView view; | ||
| 774 | }; | ||
| 775 | |||
| 776 | VideoCore::RasterizerInterface& rasterizer; | ||
| 777 | |||
| 778 | u64 ticks{}; | ||
| 779 | |||
| 780 | // Guards the cache for protection conflicts. | ||
| 781 | bool guard_render_targets{}; | ||
| 782 | bool guard_samplers{}; | ||
| 783 | |||
| 784 | // The siblings table is for formats that can inter exchange with one another | ||
| 785 | // without causing issues. This is only valid when a conflict occurs on a non | ||
| 786 | // rendering use. | ||
| 787 | std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; | ||
| 788 | |||
| 789 | // The internal Cache is different for the Texture Cache. It's based on buckets | ||
| 790 | // of 1MB. This fits better for the purpose of this cache as textures are normaly | ||
| 791 | // large in size. | ||
| 792 | static constexpr u64 registry_page_bits{20}; | ||
| 793 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||
| 794 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | ||
| 795 | |||
| 796 | // The L1 Cache is used for fast texture lookup before checking the overlaps | ||
| 797 | // This avoids calculating size and other stuffs. | ||
| 798 | std::unordered_map<CacheAddr, TSurface> l1_cache; | ||
| 799 | |||
| 800 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | ||
| 801 | /// previously been used. This is to prevent surfaces from being constantly created and | ||
| 802 | /// destroyed when used with different surface parameters. | ||
| 803 | std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; | ||
| 804 | std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> | ||
| 805 | render_targets; | ||
| 806 | FramebufferTargetInfo depth_buffer; | ||
| 807 | |||
| 808 | std::vector<TSurface> sampled_textures; | ||
| 809 | |||
| 810 | StagingCache staging_cache; | ||
| 811 | std::recursive_mutex mutex; | ||
| 812 | }; | ||
| 813 | |||
| 814 | } // namespace VideoCommon | ||
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp index 82050bd51..f3efa7eb0 100644 --- a/src/video_core/textures/convert.cpp +++ b/src/video_core/textures/convert.cpp | |||
| @@ -62,19 +62,19 @@ static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | |||
| 62 | SwapS8Z24ToZ24S8<true>(data, width, height); | 62 | SwapS8Z24ToZ24S8<true>(data, width, height); |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | 65 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, PixelFormat pixel_format, u32 width, |
| 66 | bool convert_astc, bool convert_s8z24) { | 66 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24) { |
| 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | 67 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { |
| 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | 68 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. |
| 69 | u32 block_width{}; | 69 | u32 block_width{}; |
| 70 | u32 block_height{}; | 70 | u32 block_height{}; |
| 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | 71 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); |
| 72 | const std::vector<u8> rgba8_data = | 72 | const std::vector<u8> rgba8_data = Tegra::Texture::ASTC::Decompress( |
| 73 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | 73 | in_data, width, height, depth, block_width, block_height); |
| 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), data); | 74 | std::copy(rgba8_data.begin(), rgba8_data.end(), out_data); |
| 75 | 75 | ||
| 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | 76 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { |
| 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); | 77 | Tegra::Texture::ConvertS8Z24ToZ24S8(in_data, width, height); |
| 78 | } | 78 | } |
| 79 | } | 79 | } |
| 80 | 80 | ||
| @@ -90,4 +90,4 @@ void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 h | |||
| 90 | } | 90 | } |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | } // namespace Tegra::Texture \ No newline at end of file | 93 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h index 12542e71c..d5d6c77bb 100644 --- a/src/video_core/textures/convert.h +++ b/src/video_core/textures/convert.h | |||
| @@ -12,10 +12,11 @@ enum class PixelFormat; | |||
| 12 | 12 | ||
| 13 | namespace Tegra::Texture { | 13 | namespace Tegra::Texture { |
| 14 | 14 | ||
| 15 | void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | 15 | void ConvertFromGuestToHost(u8* in_data, u8* out_data, VideoCore::Surface::PixelFormat pixel_format, |
| 16 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | 16 | u32 width, u32 height, u32 depth, bool convert_astc, |
| 17 | bool convert_s8z24); | ||
| 17 | 18 | ||
| 18 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | 19 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, |
| 19 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | 20 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); |
| 20 | 21 | ||
| 21 | } // namespace Tegra::Texture \ No newline at end of file | 22 | } // namespace Tegra::Texture |
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 217805386..7e8295944 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -36,10 +36,16 @@ struct alignas(64) SwizzleTable { | |||
| 36 | std::array<std::array<u16, M>, N> values{}; | 36 | std::array<std::array<u16, M>, N> values{}; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | constexpr u32 gob_size_x = 64; | 39 | constexpr u32 gob_size_x_shift = 6; |
| 40 | constexpr u32 gob_size_y = 8; | 40 | constexpr u32 gob_size_y_shift = 3; |
| 41 | constexpr u32 gob_size_z = 1; | 41 | constexpr u32 gob_size_z_shift = 0; |
| 42 | constexpr u32 gob_size = gob_size_x * gob_size_y * gob_size_z; | 42 | constexpr u32 gob_size_shift = gob_size_x_shift + gob_size_y_shift + gob_size_z_shift; |
| 43 | |||
| 44 | constexpr u32 gob_size_x = 1U << gob_size_x_shift; | ||
| 45 | constexpr u32 gob_size_y = 1U << gob_size_y_shift; | ||
| 46 | constexpr u32 gob_size_z = 1U << gob_size_z_shift; | ||
| 47 | constexpr u32 gob_size = 1U << gob_size_shift; | ||
| 48 | |||
| 43 | constexpr u32 fast_swizzle_align = 16; | 49 | constexpr u32 fast_swizzle_align = 16; |
| 44 | 50 | ||
| 45 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); | 51 | constexpr auto legacy_swizzle_table = SwizzleTable<gob_size_y, gob_size_x, gob_size_z>(); |
| @@ -171,14 +177,16 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 171 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | 177 | void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, |
| 172 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, | 178 | u32 out_bytes_per_pixel, u8* const swizzled_data, u8* const unswizzled_data, |
| 173 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { | 179 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing) { |
| 180 | const u32 block_height_size{1U << block_height}; | ||
| 181 | const u32 block_depth_size{1U << block_depth}; | ||
| 174 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { | 182 | if (bytes_per_pixel % 3 != 0 && (width * bytes_per_pixel) % fast_swizzle_align == 0) { |
| 175 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 183 | SwizzledData<true>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 176 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 184 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 177 | width_spacing); | 185 | block_depth_size, width_spacing); |
| 178 | } else { | 186 | } else { |
| 179 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, | 187 | SwizzledData<false>(swizzled_data, unswizzled_data, unswizzle, width, height, depth, |
| 180 | bytes_per_pixel, out_bytes_per_pixel, block_height, block_depth, | 188 | bytes_per_pixel, out_bytes_per_pixel, block_height_size, |
| 181 | width_spacing); | 189 | block_depth_size, width_spacing); |
| 182 | } | 190 | } |
| 183 | } | 191 | } |
| 184 | 192 | ||
| @@ -248,7 +256,9 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, | |||
| 248 | } | 256 | } |
| 249 | 257 | ||
| 250 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 258 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 251 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) { | 259 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 260 | u32 block_height_bit) { | ||
| 261 | const u32 block_height = 1U << block_height_bit; | ||
| 252 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / | 262 | const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / |
| 253 | gob_size_x}; | 263 | gob_size_x}; |
| 254 | for (u32 line = 0; line < subrect_height; ++line) { | 264 | for (u32 line = 0; line < subrect_height; ++line) { |
| @@ -269,8 +279,9 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 | |||
| 269 | } | 279 | } |
| 270 | 280 | ||
| 271 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 281 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 272 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | 282 | u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 273 | u32 offset_x, u32 offset_y) { | 283 | u32 block_height_bit, u32 offset_x, u32 offset_y) { |
| 284 | const u32 block_height = 1U << block_height_bit; | ||
| 274 | for (u32 line = 0; line < subrect_height; ++line) { | 285 | for (u32 line = 0; line < subrect_height; ++line) { |
| 275 | const u32 y2 = line + offset_y; | 286 | const u32 y2 = line + offset_y; |
| 276 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + | 287 | const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + |
| @@ -289,8 +300,9 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
| 289 | } | 300 | } |
| 290 | 301 | ||
| 291 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | 302 | void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, |
| 292 | const u32 block_height, const std::size_t copy_size, const u8* source_data, | 303 | const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, |
| 293 | u8* swizzle_data) { | 304 | u8* swizzle_data) { |
| 305 | const u32 block_height = 1U << block_height_bit; | ||
| 294 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | 306 | const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; |
| 295 | std::size_t count = 0; | 307 | std::size_t count = 0; |
| 296 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | 308 | for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { |
| @@ -356,9 +368,9 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat | |||
| 356 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 368 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 357 | u32 block_height, u32 block_depth) { | 369 | u32 block_height, u32 block_depth) { |
| 358 | if (tiled) { | 370 | if (tiled) { |
| 359 | const u32 aligned_width = Common::AlignUp(width * bytes_per_pixel, gob_size_x); | 371 | const u32 aligned_width = Common::AlignBits(width * bytes_per_pixel, gob_size_x_shift); |
| 360 | const u32 aligned_height = Common::AlignUp(height, gob_size_y * block_height); | 372 | const u32 aligned_height = Common::AlignBits(height, gob_size_y_shift + block_height); |
| 361 | const u32 aligned_depth = Common::AlignUp(depth, gob_size_z * block_depth); | 373 | const u32 aligned_depth = Common::AlignBits(depth, gob_size_z_shift + block_depth); |
| 362 | return aligned_width * aligned_height * aligned_depth; | 374 | return aligned_width * aligned_height * aligned_depth; |
| 363 | } else { | 375 | } else { |
| 364 | return width * height * depth * bytes_per_pixel; | 376 | return width * height * depth * bytes_per_pixel; |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index e072d8401..eaec9b5a5 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -12,8 +12,8 @@ namespace Tegra::Texture { | |||
| 12 | 12 | ||
| 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents | 13 | // GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents |
| 14 | // an small rect of (64/bytes_per_pixel)X8. | 14 | // an small rect of (64/bytes_per_pixel)X8. |
| 15 | inline std::size_t GetGOBSize() { | 15 | inline std::size_t GetGOBSizeShift() { |
| 16 | return 512; | 16 | return 9; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /// Unswizzles a swizzled texture without changing its format. | 19 | /// Unswizzles a swizzled texture without changing its format. |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 219bfd559..e3be018b9 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -52,9 +52,9 @@ enum class TextureFormat : u32 { | |||
| 52 | DXT45 = 0x26, | 52 | DXT45 = 0x26, |
| 53 | DXN1 = 0x27, | 53 | DXN1 = 0x27, |
| 54 | DXN2 = 0x28, | 54 | DXN2 = 0x28, |
| 55 | Z24S8 = 0x29, | 55 | S8Z24 = 0x29, |
| 56 | X8Z24 = 0x2a, | 56 | X8Z24 = 0x2a, |
| 57 | S8Z24 = 0x2b, | 57 | Z24S8 = 0x2b, |
| 58 | X4V4Z24__COV4R4V = 0x2c, | 58 | X4V4Z24__COV4R4V = 0x2c, |
| 59 | X4V4Z24__COV8R8V = 0x2d, | 59 | X4V4Z24__COV8R8V = 0x2d, |
| 60 | V8Z24__COV4R12V = 0x2e, | 60 | V8Z24__COV4R12V = 0x2e, |
| @@ -172,12 +172,16 @@ struct TICEntry { | |||
| 172 | BitField<26, 1, u32> use_header_opt_control; | 172 | BitField<26, 1, u32> use_header_opt_control; |
| 173 | BitField<27, 1, u32> depth_texture; | 173 | BitField<27, 1, u32> depth_texture; |
| 174 | BitField<28, 4, u32> max_mip_level; | 174 | BitField<28, 4, u32> max_mip_level; |
| 175 | |||
| 176 | BitField<0, 16, u32> buffer_high_width_minus_one; | ||
| 175 | }; | 177 | }; |
| 176 | union { | 178 | union { |
| 177 | BitField<0, 16, u32> width_minus_1; | 179 | BitField<0, 16, u32> width_minus_1; |
| 178 | BitField<22, 1, u32> srgb_conversion; | 180 | BitField<22, 1, u32> srgb_conversion; |
| 179 | BitField<23, 4, TextureType> texture_type; | 181 | BitField<23, 4, TextureType> texture_type; |
| 180 | BitField<29, 3, u32> border_size; | 182 | BitField<29, 3, u32> border_size; |
| 183 | |||
| 184 | BitField<0, 16, u32> buffer_low_width_minus_one; | ||
| 181 | }; | 185 | }; |
| 182 | union { | 186 | union { |
| 183 | BitField<0, 16, u32> height_minus_1; | 187 | BitField<0, 16, u32> height_minus_1; |
| @@ -206,7 +210,10 @@ struct TICEntry { | |||
| 206 | } | 210 | } |
| 207 | 211 | ||
| 208 | u32 Width() const { | 212 | u32 Width() const { |
| 209 | return width_minus_1 + 1; | 213 | if (header_version != TICHeaderVersion::OneDBuffer) { |
| 214 | return width_minus_1 + 1; | ||
| 215 | } | ||
| 216 | return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one; | ||
| 210 | } | 217 | } |
| 211 | 218 | ||
| 212 | u32 Height() const { | 219 | u32 Height() const { |
| @@ -219,20 +226,17 @@ struct TICEntry { | |||
| 219 | 226 | ||
| 220 | u32 BlockWidth() const { | 227 | u32 BlockWidth() const { |
| 221 | ASSERT(IsTiled()); | 228 | ASSERT(IsTiled()); |
| 222 | // The block height is stored in log2 format. | 229 | return block_width; |
| 223 | return 1 << block_width; | ||
| 224 | } | 230 | } |
| 225 | 231 | ||
| 226 | u32 BlockHeight() const { | 232 | u32 BlockHeight() const { |
| 227 | ASSERT(IsTiled()); | 233 | ASSERT(IsTiled()); |
| 228 | // The block height is stored in log2 format. | 234 | return block_height; |
| 229 | return 1 << block_height; | ||
| 230 | } | 235 | } |
| 231 | 236 | ||
| 232 | u32 BlockDepth() const { | 237 | u32 BlockDepth() const { |
| 233 | ASSERT(IsTiled()); | 238 | ASSERT(IsTiled()); |
| 234 | // The block height is stored in log2 format. | 239 | return block_depth; |
| 235 | return 1 << block_depth; | ||
| 236 | } | 240 | } |
| 237 | 241 | ||
| 238 | bool IsTiled() const { | 242 | bool IsTiled() const { |
| @@ -240,6 +244,15 @@ struct TICEntry { | |||
| 240 | header_version == TICHeaderVersion::BlockLinearColorKey; | 244 | header_version == TICHeaderVersion::BlockLinearColorKey; |
| 241 | } | 245 | } |
| 242 | 246 | ||
| 247 | bool IsLineal() const { | ||
| 248 | return header_version == TICHeaderVersion::Pitch || | ||
| 249 | header_version == TICHeaderVersion::PitchColorKey; | ||
| 250 | } | ||
| 251 | |||
| 252 | bool IsBuffer() const { | ||
| 253 | return header_version == TICHeaderVersion::OneDBuffer; | ||
| 254 | } | ||
| 255 | |||
| 243 | bool IsSrgbConversionEnabled() const { | 256 | bool IsSrgbConversionEnabled() const { |
| 244 | return srgb_conversion != 0; | 257 | return srgb_conversion != 0; |
| 245 | } | 258 | } |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 47e46f574..ae21f4753 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -750,6 +750,9 @@ void GMainWindow::OnDisplayTitleBars(bool show) { | |||
| 750 | QStringList GMainWindow::GetUnsupportedGLExtensions() { | 750 | QStringList GMainWindow::GetUnsupportedGLExtensions() { |
| 751 | QStringList unsupported_ext; | 751 | QStringList unsupported_ext; |
| 752 | 752 | ||
| 753 | if (!GLAD_GL_ARB_buffer_storage) { | ||
| 754 | unsupported_ext.append(QStringLiteral("ARB_buffer_storage")); | ||
| 755 | } | ||
| 753 | if (!GLAD_GL_ARB_direct_state_access) { | 756 | if (!GLAD_GL_ARB_direct_state_access) { |
| 754 | unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); | 757 | unsupported_ext.append(QStringLiteral("ARB_direct_state_access")); |
| 755 | } | 758 | } |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index e2d3df180..f91b071bf 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -52,6 +52,10 @@ private: | |||
| 52 | bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { | 52 | bool EmuWindow_SDL2_GL::SupportsRequiredGLExtensions() { |
| 53 | std::vector<std::string> unsupported_ext; | 53 | std::vector<std::string> unsupported_ext; |
| 54 | 54 | ||
| 55 | if (!GLAD_GL_ARB_buffer_storage) | ||
| 56 | unsupported_ext.push_back("ARB_buffer_storage"); | ||
| 57 | if (!GLAD_GL_ARB_direct_state_access) | ||
| 58 | unsupported_ext.push_back("ARB_direct_state_access"); | ||
| 55 | if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) | 59 | if (!GLAD_GL_ARB_vertex_type_10f_11f_11f_rev) |
| 56 | unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); | 60 | unsupported_ext.push_back("ARB_vertex_type_10f_11f_11f_rev"); |
| 57 | if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) | 61 | if (!GLAD_GL_ARB_texture_mirror_clamp_to_edge) |