summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar Zach Hilman2019-07-05 13:39:13 -0400
committerGravatar GitHub2019-07-05 13:39:13 -0400
commit772c86a260eb446b0fe4232b0a50666511bef25c (patch)
tree013d92268c06454c93565c83eff2b79b56a00839 /src/video_core/texture_cache
parentMerge pull request #2669 from FearlessTobi/move-cpujit-setting (diff)
parenttexture_cache: Address Feedback (diff)
downloadyuzu-772c86a260eb446b0fe4232b0a50666511bef25c.tar.gz
yuzu-772c86a260eb446b0fe4232b0a50666511bef25c.tar.xz
yuzu-772c86a260eb446b0fe4232b0a50666511bef25c.zip
Merge pull request #2601 from FernandoS27/texture_cache
Implement a new Texture Cache
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/copy_params.h36
-rw-r--r--src/video_core/texture_cache/surface_base.cpp300
-rw-r--r--src/video_core/texture_cache/surface_base.h317
-rw-r--r--src/video_core/texture_cache/surface_params.cpp334
-rw-r--r--src/video_core/texture_cache/surface_params.h286
-rw-r--r--src/video_core/texture_cache/surface_view.cpp23
-rw-r--r--src/video_core/texture_cache/surface_view.h67
-rw-r--r--src/video_core/texture_cache/texture_cache.h814
8 files changed, 2177 insertions, 0 deletions
diff --git a/src/video_core/texture_cache/copy_params.h b/src/video_core/texture_cache/copy_params.h
new file mode 100644
index 000000000..9c21a0649
--- /dev/null
+++ b/src/video_core/texture_cache/copy_params.h
@@ -0,0 +1,36 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace VideoCommon {
10
11struct CopyParams {
12 constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y,
13 u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height,
14 u32 depth)
15 : source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x},
16 dest_y{dest_y}, dest_z{dest_z}, source_level{source_level},
17 dest_level{dest_level}, width{width}, height{height}, depth{depth} {}
18
19 constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level)
20 : source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level},
21 dest_level{level}, width{width}, height{height}, depth{depth} {}
22
23 u32 source_x;
24 u32 source_y;
25 u32 source_z;
26 u32 dest_x;
27 u32 dest_y;
28 u32 dest_z;
29 u32 source_level;
30 u32 dest_level;
31 u32 width;
32 u32 height;
33 u32 depth;
34};
35
36} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
new file mode 100644
index 000000000..7a0fdb19b
--- /dev/null
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -0,0 +1,300 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/microprofile.h"
8#include "video_core/memory_manager.h"
9#include "video_core/texture_cache/surface_base.h"
10#include "video_core/texture_cache/surface_params.h"
11#include "video_core/textures/convert.h"
12
13namespace VideoCommon {
14
15MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128));
16MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128));
17
18using Tegra::Texture::ConvertFromGuestToHost;
19using VideoCore::MortonSwizzleMode;
20using VideoCore::Surface::SurfaceCompression;
21
22StagingCache::StagingCache() = default;
23
24StagingCache::~StagingCache() = default;
25
26SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
27 : params{params}, mipmap_sizes(params.num_levels),
28 mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{
29 params.GetHostSizeInBytes()} {
30 std::size_t offset = 0;
31 for (u32 level = 0; level < params.num_levels; ++level) {
32 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
33 mipmap_sizes[level] = mipmap_size;
34 mipmap_offsets[level] = offset;
35 offset += mipmap_size;
36 }
37 layer_size = offset;
38 if (params.is_layered) {
39 if (params.is_tiled) {
40 layer_size =
41 SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
42 }
43 guest_memory_size = layer_size * params.depth;
44 } else {
45 guest_memory_size = layer_size;
46 }
47}
48
49MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const {
50 const u32 src_bpp{params.GetBytesPerPixel()};
51 const u32 dst_bpp{rhs.GetBytesPerPixel()};
52 const bool ib1 = params.IsBuffer();
53 const bool ib2 = rhs.IsBuffer();
54 if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) {
55 const bool cb1 = params.IsCompressed();
56 const bool cb2 = rhs.IsCompressed();
57 if (cb1 == cb2) {
58 return MatchTopologyResult::FullMatch;
59 }
60 return MatchTopologyResult::CompressUnmatch;
61 }
62 return MatchTopologyResult::None;
63}
64
65MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const {
66 // Buffer surface Check
67 if (params.IsBuffer()) {
68 const std::size_t wd1 = params.width * params.GetBytesPerPixel();
69 const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel();
70 if (wd1 == wd2) {
71 return MatchStructureResult::FullMatch;
72 }
73 return MatchStructureResult::None;
74 }
75
76 // Linear Surface check
77 if (!params.is_tiled) {
78 if (std::tie(params.width, params.height, params.pitch) ==
79 std::tie(rhs.width, rhs.height, rhs.pitch)) {
80 return MatchStructureResult::FullMatch;
81 }
82 return MatchStructureResult::None;
83 }
84
85 // Tiled Surface check
86 if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth,
87 params.tile_width_spacing, params.num_levels) ==
88 std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth,
89 rhs.tile_width_spacing, rhs.num_levels)) {
90 if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) {
91 return MatchStructureResult::FullMatch;
92 }
93 const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format,
94 rhs.pixel_format);
95 const u32 hs =
96 SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format);
97 const u32 w1 = params.GetBlockAlignedWidth();
98 if (std::tie(w1, params.height) == std::tie(ws, hs)) {
99 return MatchStructureResult::SemiMatch;
100 }
101 }
102 return MatchStructureResult::None;
103}
104
105std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
106 const GPUVAddr candidate_gpu_addr) const {
107 if (gpu_addr == candidate_gpu_addr) {
108 return {{0, 0}};
109 }
110 if (candidate_gpu_addr < gpu_addr) {
111 return {};
112 }
113 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
114 const auto layer{static_cast<u32>(relative_address / layer_size)};
115 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
116 const auto mipmap_it =
117 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
118 if (mipmap_it == mipmap_offsets.end()) {
119 return {};
120 }
121 const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
122 return std::make_pair(layer, level);
123}
124
125std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const {
126 const u32 layers{params.depth};
127 const u32 mipmaps{params.num_levels};
128 std::vector<CopyParams> result;
129 result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps));
130
131 for (u32 layer = 0; layer < layers; layer++) {
132 for (u32 level = 0; level < mipmaps; level++) {
133 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
134 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
135 result.emplace_back(width, height, layer, level);
136 }
137 }
138 return result;
139}
140
141std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const {
142 const u32 mipmaps{params.num_levels};
143 std::vector<CopyParams> result;
144 result.reserve(mipmaps);
145
146 for (u32 level = 0; level < mipmaps; level++) {
147 const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
148 const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
149 const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))};
150 result.emplace_back(width, height, depth, level);
151 }
152 return result;
153}
154
155void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,
156 u8* buffer, u32 level) {
157 const u32 width{params.GetMipWidth(level)};
158 const u32 height{params.GetMipHeight(level)};
159 const u32 block_height{params.GetMipBlockHeight(level)};
160 const u32 block_depth{params.GetMipBlockDepth(level)};
161
162 std::size_t guest_offset{mipmap_offsets[level]};
163 if (params.is_layered) {
164 std::size_t host_offset{0};
165 const std::size_t guest_stride = layer_size;
166 const std::size_t host_stride = params.GetHostLayerSize(level);
167 for (u32 layer = 0; layer < params.depth; ++layer) {
168 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1,
169 params.tile_width_spacing, buffer + host_offset, memory + guest_offset);
170 guest_offset += guest_stride;
171 host_offset += host_stride;
172 }
173 } else {
174 MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth,
175 params.GetMipDepth(level), params.tile_width_spacing, buffer,
176 memory + guest_offset);
177 }
178}
179
180void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
181 StagingCache& staging_cache) {
182 MICROPROFILE_SCOPE(GPU_Load_Texture);
183 auto& staging_buffer = staging_cache.GetBuffer(0);
184 u8* host_ptr;
185 is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size);
186
187 // Handle continuouty
188 if (is_continuous) {
189 // Use physical memory directly
190 host_ptr = memory_manager.GetPointer(gpu_addr);
191 if (!host_ptr) {
192 return;
193 }
194 } else {
195 // Use an extra temporal buffer
196 auto& tmp_buffer = staging_cache.GetBuffer(1);
197 tmp_buffer.resize(guest_memory_size);
198 host_ptr = tmp_buffer.data();
199 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
200 }
201
202 if (params.is_tiled) {
203 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
204 params.block_width, static_cast<u32>(params.target));
205 for (u32 level = 0; level < params.num_levels; ++level) {
206 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
207 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
208 staging_buffer.data() + host_offset, level);
209 }
210 } else {
211 ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented");
212 const u32 bpp{params.GetBytesPerPixel()};
213 const u32 block_width{params.GetDefaultBlockWidth()};
214 const u32 block_height{params.GetDefaultBlockHeight()};
215 const u32 width{(params.width + block_width - 1) / block_width};
216 const u32 height{(params.height + block_height - 1) / block_height};
217 const u32 copy_size{width * bpp};
218 if (params.pitch == copy_size) {
219 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes());
220 } else {
221 const u8* start{host_ptr};
222 u8* write_to{staging_buffer.data()};
223 for (u32 h = height; h > 0; --h) {
224 std::memcpy(write_to, start, copy_size);
225 start += params.pitch;
226 write_to += copy_size;
227 }
228 }
229 }
230
231 auto compression_type = params.GetCompressionType();
232 if (compression_type == SurfaceCompression::None ||
233 compression_type == SurfaceCompression::Compressed)
234 return;
235
236 for (u32 level_up = params.num_levels; level_up > 0; --level_up) {
237 const u32 level = level_up - 1;
238 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)};
239 const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged
240 ? in_host_offset
241 : params.GetConvertedMipmapOffset(level);
242 u8* in_buffer = staging_buffer.data() + in_host_offset;
243 u8* out_buffer = staging_buffer.data() + out_host_offset;
244 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
245 params.GetMipWidth(level), params.GetMipHeight(level),
246 params.GetMipDepth(level), true, true);
247 }
248}
249
250void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
251 StagingCache& staging_cache) {
252 MICROPROFILE_SCOPE(GPU_Flush_Texture);
253 auto& staging_buffer = staging_cache.GetBuffer(0);
254 u8* host_ptr;
255
256 // Handle continuouty
257 if (is_continuous) {
258 // Use physical memory directly
259 host_ptr = memory_manager.GetPointer(gpu_addr);
260 if (!host_ptr) {
261 return;
262 }
263 } else {
264 // Use an extra temporal buffer
265 auto& tmp_buffer = staging_cache.GetBuffer(1);
266 tmp_buffer.resize(guest_memory_size);
267 host_ptr = tmp_buffer.data();
268 }
269
270 if (params.is_tiled) {
271 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
272 for (u32 level = 0; level < params.num_levels; ++level) {
273 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)};
274 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
275 staging_buffer.data() + host_offset, level);
276 }
277 } else {
278 ASSERT(params.target == SurfaceTarget::Texture2D);
279 ASSERT(params.num_levels == 1);
280
281 const u32 bpp{params.GetBytesPerPixel()};
282 const u32 copy_size{params.width * bpp};
283 if (params.pitch == copy_size) {
284 std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
285 } else {
286 u8* start{host_ptr};
287 const u8* read_to{staging_buffer.data()};
288 for (u32 h = params.height; h > 0; --h) {
289 std::memcpy(start, read_to, copy_size);
290 start += params.pitch;
291 read_to += copy_size;
292 }
293 }
294 }
295 if (!is_continuous) {
296 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
297 }
298}
299
300} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
new file mode 100644
index 000000000..8ba386a8a
--- /dev/null
+++ b/src/video_core/texture_cache/surface_base.h
@@ -0,0 +1,317 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <unordered_map>
9#include <vector>
10
11#include "common/assert.h"
12#include "common/binary_find.h"
13#include "common/common_types.h"
14#include "video_core/gpu.h"
15#include "video_core/morton.h"
16#include "video_core/texture_cache/copy_params.h"
17#include "video_core/texture_cache/surface_params.h"
18#include "video_core/texture_cache/surface_view.h"
19
20namespace Tegra {
21class MemoryManager;
22}
23
24namespace VideoCommon {
25
26using VideoCore::MortonSwizzleMode;
27using VideoCore::Surface::SurfaceTarget;
28
29enum class MatchStructureResult : u32 {
30 FullMatch = 0,
31 SemiMatch = 1,
32 None = 2,
33};
34
35enum class MatchTopologyResult : u32 {
36 FullMatch = 0,
37 CompressUnmatch = 1,
38 None = 2,
39};
40
41class StagingCache {
42public:
43 explicit StagingCache();
44 ~StagingCache();
45
46 std::vector<u8>& GetBuffer(std::size_t index) {
47 return staging_buffer[index];
48 }
49
50 const std::vector<u8>& GetBuffer(std::size_t index) const {
51 return staging_buffer[index];
52 }
53
54 void SetSize(std::size_t size) {
55 staging_buffer.resize(size);
56 }
57
58private:
59 std::vector<std::vector<u8>> staging_buffer;
60};
61
62class SurfaceBaseImpl {
63public:
64 void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
65
66 void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache);
67
68 GPUVAddr GetGpuAddr() const {
69 return gpu_addr;
70 }
71
72 bool Overlaps(const CacheAddr start, const CacheAddr end) const {
73 return (cache_addr < end) && (cache_addr_end > start);
74 }
75
76 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
77 const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size;
78 return (gpu_addr <= other_start && other_end <= gpu_addr_end);
79 }
80
81 // Use only when recycling a surface
82 void SetGpuAddr(const GPUVAddr new_addr) {
83 gpu_addr = new_addr;
84 }
85
86 VAddr GetCpuAddr() const {
87 return cpu_addr;
88 }
89
90 void SetCpuAddr(const VAddr new_addr) {
91 cpu_addr = new_addr;
92 }
93
94 CacheAddr GetCacheAddr() const {
95 return cache_addr;
96 }
97
98 CacheAddr GetCacheAddrEnd() const {
99 return cache_addr_end;
100 }
101
102 void SetCacheAddr(const CacheAddr new_addr) {
103 cache_addr = new_addr;
104 cache_addr_end = new_addr + guest_memory_size;
105 }
106
107 const SurfaceParams& GetSurfaceParams() const {
108 return params;
109 }
110
111 std::size_t GetSizeInBytes() const {
112 return guest_memory_size;
113 }
114
115 std::size_t GetHostSizeInBytes() const {
116 return host_memory_size;
117 }
118
119 std::size_t GetMipmapSize(const u32 level) const {
120 return mipmap_sizes[level];
121 }
122
123 void MarkAsContinuous(const bool is_continuous) {
124 this->is_continuous = is_continuous;
125 }
126
127 bool IsContinuous() const {
128 return is_continuous;
129 }
130
131 bool IsLinear() const {
132 return !params.is_tiled;
133 }
134
135 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
136 return params.pixel_format == pixel_format;
137 }
138
139 VideoCore::Surface::PixelFormat GetFormat() const {
140 return params.pixel_format;
141 }
142
143 bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const {
144 return params.target == target;
145 }
146
147 MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const;
148
149 MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const;
150
151 bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const {
152 return std::tie(gpu_addr, params.target, params.num_levels) ==
153 std::tie(other_gpu_addr, rhs.target, rhs.num_levels) &&
154 params.target == SurfaceTarget::Texture2D && params.num_levels == 1;
155 }
156
157 std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const;
158
159 std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const {
160 return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params);
161 }
162
163protected:
164 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params);
165 ~SurfaceBaseImpl() = default;
166
167 virtual void DecorateSurfaceName() = 0;
168
169 const SurfaceParams params;
170 std::size_t layer_size;
171 std::size_t guest_memory_size;
172 const std::size_t host_memory_size;
173 GPUVAddr gpu_addr{};
174 CacheAddr cache_addr{};
175 CacheAddr cache_addr_end{};
176 VAddr cpu_addr{};
177 bool is_continuous{};
178
179 std::vector<std::size_t> mipmap_sizes;
180 std::vector<std::size_t> mipmap_offsets;
181
182private:
183 void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer,
184 u32 level);
185
186 std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const;
187
188 std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const;
189};
190
191template <typename TView>
192class SurfaceBase : public SurfaceBaseImpl {
193public:
194 virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0;
195
196 virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
197
198 void MarkAsModified(const bool is_modified_, const u64 tick) {
199 is_modified = is_modified_ || is_target;
200 modification_tick = tick;
201 }
202
203 void MarkAsRenderTarget(const bool is_target) {
204 this->is_target = is_target;
205 }
206
207 void MarkAsPicked(const bool is_picked) {
208 this->is_picked = is_picked;
209 }
210
211 bool IsModified() const {
212 return is_modified;
213 }
214
215 bool IsProtected() const {
216 // Only 3D Slices are to be protected
217 return is_target && params.block_depth > 0;
218 }
219
220 bool IsRenderTarget() const {
221 return is_target;
222 }
223
224 bool IsRegistered() const {
225 return is_registered;
226 }
227
228 bool IsPicked() const {
229 return is_picked;
230 }
231
232 void MarkAsRegistered(bool is_reg) {
233 is_registered = is_reg;
234 }
235
236 u64 GetModificationTick() const {
237 return modification_tick;
238 }
239
240 TView EmplaceOverview(const SurfaceParams& overview_params) {
241 const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth};
242 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
243 }
244
245 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
246 const GPUVAddr view_addr,
247 const std::size_t candidate_size, const u32 mipmap,
248 const u32 layer) {
249 const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)};
250 if (!layer_mipmap) {
251 return {};
252 }
253 const u32 end_layer{layer_mipmap->first};
254 const u32 end_mipmap{layer_mipmap->second};
255 if (layer != end_layer) {
256 if (mipmap == 0 && end_mipmap == 0) {
257 return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1));
258 }
259 return {};
260 } else {
261 return GetView(
262 ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1));
263 }
264 }
265
266 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
267 const std::size_t candidate_size) {
268 if (params.target == SurfaceTarget::Texture3D ||
269 (params.num_levels == 1 && !params.is_layered) ||
270 view_params.target == SurfaceTarget::Texture3D) {
271 return {};
272 }
273 const auto layer_mipmap{GetLayerMipmap(view_addr)};
274 if (!layer_mipmap) {
275 return {};
276 }
277 const u32 layer{layer_mipmap->first};
278 const u32 mipmap{layer_mipmap->second};
279 if (GetMipmapSize(mipmap) != candidate_size) {
280 return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer);
281 }
282 return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1));
283 }
284
285 TView GetMainView() const {
286 return main_view;
287 }
288
289protected:
290 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params)
291 : SurfaceBaseImpl(gpu_addr, params) {}
292
293 ~SurfaceBase() = default;
294
295 virtual TView CreateView(const ViewParams& view_key) = 0;
296
297 TView main_view;
298 std::unordered_map<ViewParams, TView> views;
299
300private:
301 TView GetView(const ViewParams& key) {
302 const auto [entry, is_cache_miss] = views.try_emplace(key);
303 auto& view{entry->second};
304 if (is_cache_miss) {
305 view = CreateView(key);
306 }
307 return view;
308 }
309
310 bool is_modified{};
311 bool is_target{};
312 bool is_registered{};
313 bool is_picked{};
314 u64 modification_tick{};
315};
316
317} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
new file mode 100644
index 000000000..9c56e2b4f
--- /dev/null
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -0,0 +1,334 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6
7#include "common/alignment.h"
8#include "common/bit_util.h"
9#include "core/core.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/surface.h"
12#include "video_core/texture_cache/surface_params.h"
13
14namespace VideoCommon {
15
16using VideoCore::Surface::ComponentTypeFromDepthFormat;
17using VideoCore::Surface::ComponentTypeFromRenderTarget;
18using VideoCore::Surface::ComponentTypeFromTexture;
19using VideoCore::Surface::PixelFormat;
20using VideoCore::Surface::PixelFormatFromDepthFormat;
21using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
22using VideoCore::Surface::PixelFormatFromTextureFormat;
23using VideoCore::Surface::SurfaceTarget;
24using VideoCore::Surface::SurfaceTargetFromTextureType;
25using VideoCore::Surface::SurfaceType;
26
27SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
28 switch (type) {
29 case Tegra::Shader::TextureType::Texture1D: {
30 if (is_array)
31 return SurfaceTarget::Texture1DArray;
32 else
33 return SurfaceTarget::Texture1D;
34 }
35 case Tegra::Shader::TextureType::Texture2D: {
36 if (is_array)
37 return SurfaceTarget::Texture2DArray;
38 else
39 return SurfaceTarget::Texture2D;
40 }
41 case Tegra::Shader::TextureType::Texture3D: {
42 ASSERT(!is_array);
43 return SurfaceTarget::Texture3D;
44 }
45 case Tegra::Shader::TextureType::TextureCube: {
46 if (is_array)
47 return SurfaceTarget::TextureCubeArray;
48 else
49 return SurfaceTarget::TextureCubemap;
50 }
51 default: {
52 UNREACHABLE();
53 return SurfaceTarget::Texture2D;
54 }
55 }
56}
57
58namespace {
59constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
60 return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
61}
62} // Anonymous namespace
63
64SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
65 const Tegra::Texture::FullTextureInfo& config,
66 const VideoCommon::Shader::Sampler& entry) {
67 SurfaceParams params;
68 params.is_tiled = config.tic.IsTiled();
69 params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
70 params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
71 params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
72 params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
73 params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
74 params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
75 params.srgb_conversion);
76 params.type = GetFormatType(params.pixel_format);
77 if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
78 switch (params.pixel_format) {
79 case PixelFormat::R16U:
80 case PixelFormat::R16F: {
81 params.pixel_format = PixelFormat::Z16;
82 break;
83 }
84 case PixelFormat::R32F: {
85 params.pixel_format = PixelFormat::Z32F;
86 break;
87 }
88 default: {
89 UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
90 static_cast<u32>(params.pixel_format));
91 }
92 }
93 params.type = GetFormatType(params.pixel_format);
94 }
95 params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
96 params.type = GetFormatType(params.pixel_format);
97 // TODO: on 1DBuffer we should use the tic info.
98 if (!config.tic.IsBuffer()) {
99 params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray());
100 params.width = config.tic.Width();
101 params.height = config.tic.Height();
102 params.depth = config.tic.Depth();
103 params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
104 if (params.target == SurfaceTarget::TextureCubemap ||
105 params.target == SurfaceTarget::TextureCubeArray) {
106 params.depth *= 6;
107 }
108 params.num_levels = config.tic.max_mip_level + 1;
109 params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
110 params.is_layered = params.IsLayered();
111 } else {
112 params.target = SurfaceTarget::TextureBuffer;
113 params.width = config.tic.Width();
114 params.pitch = params.width * params.GetBytesPerPixel();
115 params.height = 1;
116 params.depth = 1;
117 params.num_levels = 1;
118 params.emulated_levels = 1;
119 params.is_layered = false;
120 }
121 return params;
122}
123
124SurfaceParams SurfaceParams::CreateForDepthBuffer(
125 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
126 u32 block_width, u32 block_height, u32 block_depth,
127 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
128 SurfaceParams params;
129 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
130 params.srgb_conversion = false;
131 params.block_width = std::min(block_width, 5U);
132 params.block_height = std::min(block_height, 5U);
133 params.block_depth = std::min(block_depth, 5U);
134 params.tile_width_spacing = 1;
135 params.pixel_format = PixelFormatFromDepthFormat(format);
136 params.component_type = ComponentTypeFromDepthFormat(format);
137 params.type = GetFormatType(params.pixel_format);
138 params.width = zeta_width;
139 params.height = zeta_height;
140 params.target = SurfaceTarget::Texture2D;
141 params.depth = 1;
142 params.pitch = 0;
143 params.num_levels = 1;
144 params.emulated_levels = 1;
145 params.is_layered = false;
146 return params;
147}
148
149SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
150 const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
151 SurfaceParams params;
152 params.is_tiled =
153 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
154 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
155 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
156 params.block_width = config.memory_layout.block_width;
157 params.block_height = config.memory_layout.block_height;
158 params.block_depth = config.memory_layout.block_depth;
159 params.tile_width_spacing = 1;
160 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
161 params.component_type = ComponentTypeFromRenderTarget(config.format);
162 params.type = GetFormatType(params.pixel_format);
163 if (params.is_tiled) {
164 params.pitch = 0;
165 params.width = config.width;
166 } else {
167 const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT;
168 params.pitch = config.width;
169 params.width = params.pitch / bpp;
170 }
171 params.height = config.height;
172 params.depth = 1;
173 params.target = SurfaceTarget::Texture2D;
174 params.num_levels = 1;
175 params.emulated_levels = 1;
176 params.is_layered = false;
177 return params;
178}
179
180SurfaceParams SurfaceParams::CreateForFermiCopySurface(
181 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
182 SurfaceParams params{};
183 params.is_tiled = !config.linear;
184 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
185 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
186 params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0,
187 params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0,
188 params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
189 params.tile_width_spacing = 1;
190 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
191 params.component_type = ComponentTypeFromRenderTarget(config.format);
192 params.type = GetFormatType(params.pixel_format);
193 params.width = config.width;
194 params.height = config.height;
195 params.pitch = config.pitch;
196 // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
197 params.target = SurfaceTarget::Texture2D;
198 params.depth = 1;
199 params.num_levels = 1;
200 params.emulated_levels = 1;
201 params.is_layered = params.IsLayered();
202 return params;
203}
204
205bool SurfaceParams::IsLayered() const {
206 switch (target) {
207 case SurfaceTarget::Texture1DArray:
208 case SurfaceTarget::Texture2DArray:
209 case SurfaceTarget::TextureCubemap:
210 case SurfaceTarget::TextureCubeArray:
211 return true;
212 default:
213 return false;
214 }
215}
216
217// Auto block resizing algorithm from:
218// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
219u32 SurfaceParams::GetMipBlockHeight(u32 level) const {
220 if (level == 0) {
221 return this->block_height;
222 }
223
224 const u32 height_new{GetMipHeight(level)};
225 const u32 default_block_height{GetDefaultBlockHeight()};
226 const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height};
227 const u32 block_height_new = Common::Log2Ceil32(blocks_in_y);
228 return std::clamp(block_height_new, 3U, 7U) - 3U;
229}
230
231u32 SurfaceParams::GetMipBlockDepth(u32 level) const {
232 if (level == 0) {
233 return this->block_depth;
234 }
235 if (is_layered) {
236 return 0;
237 }
238
239 const u32 depth_new{GetMipDepth(level)};
240 const u32 block_depth_new = Common::Log2Ceil32(depth_new);
241 if (block_depth_new > 4) {
242 return 5 - (GetMipBlockHeight(level) >= 2);
243 }
244 return block_depth_new;
245}
246
247std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
248 std::size_t offset = 0;
249 for (u32 i = 0; i < level; i++) {
250 offset += GetInnerMipmapMemorySize(i, false, false);
251 }
252 return offset;
253}
254
255std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const {
256 std::size_t offset = 0;
257 for (u32 i = 0; i < level; i++) {
258 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
259 }
260 return offset;
261}
262
263std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
264 std::size_t offset = 0;
265 for (u32 i = 0; i < level; i++) {
266 offset += GetConvertedMipmapSize(i);
267 }
268 return offset;
269}
270
271std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
272 constexpr std::size_t rgba8_bpp = 4ULL;
273 const std::size_t width_t = GetMipWidth(level);
274 const std::size_t height_t = GetMipHeight(level);
275 const std::size_t depth_t = is_layered ? depth : GetMipDepth(level);
276 return width_t * height_t * depth_t * rgba8_bpp;
277}
278
279std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
280 std::size_t size = 0;
281 for (u32 level = 0; level < num_levels; ++level) {
282 size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed);
283 }
284 if (is_tiled && is_layered) {
285 return Common::AlignBits(size,
286 Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
287 }
288 return size;
289}
290
291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
292 bool uncompressed) const {
293 const bool tiled{as_host_size ? false : is_tiled};
294 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
295 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
296 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
297 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth,
298 GetMipBlockHeight(level), GetMipBlockDepth(level));
299}
300
301bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
302 return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
303 height, depth, pitch, num_levels, pixel_format, component_type, type, target) ==
304 std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
305 rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
306 rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target);
307}
308
309std::string SurfaceParams::TargetName() const {
310 switch (target) {
311 case SurfaceTarget::Texture1D:
312 return "1D";
313 case SurfaceTarget::TextureBuffer:
314 return "TexBuffer";
315 case SurfaceTarget::Texture2D:
316 return "2D";
317 case SurfaceTarget::Texture3D:
318 return "3D";
319 case SurfaceTarget::Texture1DArray:
320 return "1DArray";
321 case SurfaceTarget::Texture2DArray:
322 return "2DArray";
323 case SurfaceTarget::TextureCubemap:
324 return "Cube";
325 case SurfaceTarget::TextureCubeArray:
326 return "CubeArray";
327 default:
328 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
329 UNREACHABLE();
330 return fmt::format("TUK({})", static_cast<u32>(target));
331 }
332}
333
334} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
new file mode 100644
index 000000000..358d6757c
--- /dev/null
+++ b/src/video_core/texture_cache/surface_params.h
@@ -0,0 +1,286 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <map>
8
9#include "common/alignment.h"
10#include "common/bit_util.h"
11#include "common/cityhash.h"
12#include "common/common_types.h"
13#include "video_core/engines/fermi_2d.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/surface.h"
17#include "video_core/textures/decoders.h"
18
19namespace VideoCommon {
20
21using VideoCore::Surface::SurfaceCompression;
22
23class SurfaceParams {
24public:
25 /// Creates SurfaceCachedParams from a texture configuration.
26 static SurfaceParams CreateForTexture(Core::System& system,
27 const Tegra::Texture::FullTextureInfo& config,
28 const VideoCommon::Shader::Sampler& entry);
29
30 /// Creates SurfaceCachedParams for a depth buffer configuration.
31 static SurfaceParams CreateForDepthBuffer(
32 Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
33 u32 block_width, u32 block_height, u32 block_depth,
34 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
35
36 /// Creates SurfaceCachedParams from a framebuffer configuration.
37 static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
38
39 /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
40 static SurfaceParams CreateForFermiCopySurface(
41 const Tegra::Engines::Fermi2D::Regs::Surface& config);
42
43 std::size_t Hash() const {
44 return static_cast<std::size_t>(
45 Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this)));
46 }
47
48 bool operator==(const SurfaceParams& rhs) const;
49
50 bool operator!=(const SurfaceParams& rhs) const {
51 return !operator==(rhs);
52 }
53
54 std::size_t GetGuestSizeInBytes() const {
55 return GetInnerMemorySize(false, false, false);
56 }
57
58 std::size_t GetHostSizeInBytes() const {
59 std::size_t host_size_in_bytes;
60 if (GetCompressionType() == SurfaceCompression::Converted) {
61 constexpr std::size_t rgb8_bpp = 4ULL;
62 // ASTC is uncompressed in software, in emulated as RGBA8
63 host_size_in_bytes = 0;
64 for (u32 level = 0; level < num_levels; ++level) {
65 host_size_in_bytes += GetConvertedMipmapSize(level);
66 }
67 } else {
68 host_size_in_bytes = GetInnerMemorySize(true, false, false);
69 }
70 return host_size_in_bytes;
71 }
72
73 u32 GetBlockAlignedWidth() const {
74 return Common::AlignUp(width, 64 / GetBytesPerPixel());
75 }
76
77 /// Returns the width of a given mipmap level.
78 u32 GetMipWidth(u32 level) const {
79 return std::max(1U, width >> level);
80 }
81
82 /// Returns the height of a given mipmap level.
83 u32 GetMipHeight(u32 level) const {
84 return std::max(1U, height >> level);
85 }
86
87 /// Returns the depth of a given mipmap level.
88 u32 GetMipDepth(u32 level) const {
89 return is_layered ? depth : std::max(1U, depth >> level);
90 }
91
92 /// Returns the block height of a given mipmap level.
93 u32 GetMipBlockHeight(u32 level) const;
94
95 /// Returns the block depth of a given mipmap level.
96 u32 GetMipBlockDepth(u32 level) const;
97
98 /// Returns the best possible row/pitch alignment for the surface.
99 u32 GetRowAlignment(u32 level) const {
100 const u32 bpp =
101 GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
102 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
103 }
104
105 /// Returns the offset in bytes in guest memory of a given mipmap level.
106 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
107
108 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
109 std::size_t GetHostMipmapLevelOffset(u32 level) const;
110
111 /// Returns the offset in bytes in host memory (linear) of a given mipmap level
112 /// for a texture that is converted in host gpu.
113 std::size_t GetConvertedMipmapOffset(u32 level) const;
114
115 /// Returns the size in bytes in guest memory of a given mipmap level.
116 std::size_t GetGuestMipmapSize(u32 level) const {
117 return GetInnerMipmapMemorySize(level, false, false);
118 }
119
120 /// Returns the size in bytes in host memory (linear) of a given mipmap level.
121 std::size_t GetHostMipmapSize(u32 level) const {
122 return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers();
123 }
124
125 std::size_t GetConvertedMipmapSize(u32 level) const;
126
127 /// Returns the size of a layer in bytes in guest memory.
128 std::size_t GetGuestLayerSize() const {
129 return GetLayerSize(false, false);
130 }
131
132 /// Returns the size of a layer in bytes in host memory for a given mipmap level.
133 std::size_t GetHostLayerSize(u32 level) const {
134 ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D);
135 return GetInnerMipmapMemorySize(level, true, false);
136 }
137
138 /// Returns the max possible mipmap that the texture can have in host gpu
139 u32 MaxPossibleMipmap() const {
140 const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U;
141 const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U;
142 const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h);
143 if (target != VideoCore::Surface::SurfaceTarget::Texture3D)
144 return max_mipmap;
145 return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U);
146 }
147
148 /// Returns if the guest surface is a compressed surface.
149 bool IsCompressed() const {
150 return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1;
151 }
152
153 /// Returns the default block width.
154 u32 GetDefaultBlockWidth() const {
155 return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
156 }
157
158 /// Returns the default block height.
159 u32 GetDefaultBlockHeight() const {
160 return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
161 }
162
163 /// Returns the bits per pixel.
164 u32 GetBitsPerPixel() const {
165 return VideoCore::Surface::GetFormatBpp(pixel_format);
166 }
167
168 /// Returns the bytes per pixel.
169 u32 GetBytesPerPixel() const {
170 return VideoCore::Surface::GetBytesPerPixel(pixel_format);
171 }
172
173 /// Returns true if the pixel format is a depth and/or stencil format.
174 bool IsPixelFormatZeta() const {
175 return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
176 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
177 }
178
179 /// Returns how the compression should be handled for this texture.
180 SurfaceCompression GetCompressionType() const {
181 return VideoCore::Surface::GetFormatCompressionType(pixel_format);
182 }
183
184 /// Returns is the surface is a TextureBuffer type of surface.
185 bool IsBuffer() const {
186 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
187 }
188
189 /// Returns the debug name of the texture for use in graphic debuggers.
190 std::string TargetName() const;
191
192 // Helper used for out of class size calculations
193 static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
194 const u32 block_depth) {
195 return Common::AlignBits(out_size,
196 Tegra::Texture::GetGOBSizeShift() + block_height + block_depth);
197 }
198
199 /// Converts a width from a type of surface into another. This helps represent the
200 /// equivalent value between compressed/non-compressed textures.
201 static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from,
202 VideoCore::Surface::PixelFormat pixel_format_to) {
203 const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from);
204 const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to);
205 return (width * bw2 + bw1 - 1) / bw1;
206 }
207
208 /// Converts a height from a type of surface into another. This helps represent the
209 /// equivalent value between compressed/non-compressed textures.
210 static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from,
211 VideoCore::Surface::PixelFormat pixel_format_to) {
212 const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from);
213 const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to);
214 return (height * bh2 + bh1 - 1) / bh1;
215 }
216
217 // Finds the maximun possible width between 2 2D layers of different formats
218 static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params,
219 const u32 src_level, const u32 dst_level) {
220 const u32 bw1 = src_params.GetDefaultBlockWidth();
221 const u32 bw2 = dst_params.GetDefaultBlockWidth();
222 const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1;
223 const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2;
224 return std::min(t_src_width, t_dst_width);
225 }
226
227 // Finds the maximun possible height between 2 2D layers of different formats
228 static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params,
229 const u32 src_level, const u32 dst_level) {
230 const u32 bh1 = src_params.GetDefaultBlockHeight();
231 const u32 bh2 = dst_params.GetDefaultBlockHeight();
232 const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1;
233 const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2;
234 return std::min(t_src_height, t_dst_height);
235 }
236
237 bool is_tiled;
238 bool srgb_conversion;
239 bool is_layered;
240 u32 block_width;
241 u32 block_height;
242 u32 block_depth;
243 u32 tile_width_spacing;
244 u32 width;
245 u32 height;
246 u32 depth;
247 u32 pitch;
248 u32 num_levels;
249 u32 emulated_levels;
250 VideoCore::Surface::PixelFormat pixel_format;
251 VideoCore::Surface::ComponentType component_type;
252 VideoCore::Surface::SurfaceType type;
253 VideoCore::Surface::SurfaceTarget target;
254
255private:
256 /// Returns the size of a given mipmap level inside a layer.
257 std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const;
258
259 /// Returns the size of all mipmap levels and aligns as needed.
260 std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
261 return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
262 }
263
264 /// Returns the size of a layer
265 std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
266
267 std::size_t GetNumLayers() const {
268 return is_layered ? depth : 1;
269 }
270
271 /// Returns true if these parameters are from a layered surface.
272 bool IsLayered() const;
273};
274
275} // namespace VideoCommon
276
277namespace std {
278
279template <>
280struct hash<VideoCommon::SurfaceParams> {
281 std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
282 return k.Hash();
283 }
284};
285
286} // namespace std
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
new file mode 100644
index 000000000..467696a4c
--- /dev/null
+++ b/src/video_core/texture_cache/surface_view.cpp
@@ -0,0 +1,23 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/common_types.h"
8#include "video_core/texture_cache/surface_view.h"
9
10namespace VideoCommon {
11
12std::size_t ViewParams::Hash() const {
13 return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^
14 (static_cast<std::size_t>(base_level) << 24) ^
15 (static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
16}
17
18bool ViewParams::operator==(const ViewParams& rhs) const {
19 return std::tie(base_layer, num_layers, base_level, num_levels, target) ==
20 std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target);
21}
22
23} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
new file mode 100644
index 000000000..04ca5639b
--- /dev/null
+++ b/src/video_core/texture_cache/surface_view.h
@@ -0,0 +1,67 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8
9#include "common/common_types.h"
10#include "video_core/surface.h"
11#include "video_core/texture_cache/surface_params.h"
12
13namespace VideoCommon {
14
15struct ViewParams {
16 ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers,
17 u32 base_level, u32 num_levels)
18 : target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
19 num_levels{num_levels} {}
20
21 std::size_t Hash() const;
22
23 bool operator==(const ViewParams& rhs) const;
24
25 VideoCore::Surface::SurfaceTarget target{};
26 u32 base_layer{};
27 u32 num_layers{};
28 u32 base_level{};
29 u32 num_levels{};
30
31 bool IsLayered() const {
32 switch (target) {
33 case VideoCore::Surface::SurfaceTarget::Texture1DArray:
34 case VideoCore::Surface::SurfaceTarget::Texture2DArray:
35 case VideoCore::Surface::SurfaceTarget::TextureCubemap:
36 case VideoCore::Surface::SurfaceTarget::TextureCubeArray:
37 return true;
38 default:
39 return false;
40 }
41 }
42};
43
44class ViewBase {
45public:
46 ViewBase(const ViewParams& params) : params{params} {}
47
48 const ViewParams& GetViewParams() const {
49 return params;
50 }
51
52protected:
53 ViewParams params;
54};
55
56} // namespace VideoCommon
57
58namespace std {
59
60template <>
61struct hash<VideoCommon::ViewParams> {
62 std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept {
63 return k.Hash();
64 }
65};
66
67} // namespace std
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
new file mode 100644
index 000000000..c9e72531a
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -0,0 +1,814 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <memory>
10#include <mutex>
11#include <set>
12#include <tuple>
13#include <unordered_map>
14#include <vector>
15
16#include <boost/icl/interval_map.hpp>
17#include <boost/range/iterator_range.hpp>
18
19#include "common/assert.h"
20#include "common/common_types.h"
21#include "common/math_util.h"
22#include "core/core.h"
23#include "core/memory.h"
24#include "core/settings.h"
25#include "video_core/engines/fermi_2d.h"
26#include "video_core/engines/maxwell_3d.h"
27#include "video_core/gpu.h"
28#include "video_core/memory_manager.h"
29#include "video_core/rasterizer_interface.h"
30#include "video_core/surface.h"
31#include "video_core/texture_cache/copy_params.h"
32#include "video_core/texture_cache/surface_base.h"
33#include "video_core/texture_cache/surface_params.h"
34#include "video_core/texture_cache/surface_view.h"
35
36namespace Tegra::Texture {
37struct FullTextureInfo;
38}
39
40namespace VideoCore {
41class RasterizerInterface;
42}
43
44namespace VideoCommon {
45
46using VideoCore::Surface::PixelFormat;
47
48using VideoCore::Surface::SurfaceTarget;
49using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
50
51template <typename TSurface, typename TView>
52class TextureCache {
53 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
54 using IntervalType = typename IntervalMap::interval_type;
55
56public:
57 void InvalidateRegion(CacheAddr addr, std::size_t size) {
58 std::lock_guard lock{mutex};
59
60 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
61 Unregister(surface);
62 }
63 }
64
65 /***
66 * `Guard` guarantees that rendertargets don't unregister themselves if the
67 * collide. Protection is currently only done on 3D slices.
68 ***/
69 void GuardRenderTargets(bool new_guard) {
70 guard_render_targets = new_guard;
71 }
72
73 void GuardSamplers(bool new_guard) {
74 guard_samplers = new_guard;
75 }
76
77 void FlushRegion(CacheAddr addr, std::size_t size) {
78 std::lock_guard lock{mutex};
79
80 auto surfaces = GetSurfacesInRegion(addr, size);
81 if (surfaces.empty()) {
82 return;
83 }
84 std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) {
85 return a->GetModificationTick() < b->GetModificationTick();
86 });
87 for (const auto& surface : surfaces) {
88 FlushSurface(surface);
89 }
90 }
91
92 TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
93 const VideoCommon::Shader::Sampler& entry) {
94 std::lock_guard lock{mutex};
95 const auto gpu_addr{config.tic.Address()};
96 if (!gpu_addr) {
97 return {};
98 }
99 const auto params{SurfaceParams::CreateForTexture(system, config, entry)};
100 const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
101 if (guard_samplers) {
102 sampled_textures.push_back(surface);
103 }
104 return view;
105 }
106
107 bool TextureBarrier() {
108 const bool any_rt =
109 std::any_of(sampled_textures.begin(), sampled_textures.end(),
110 [](const auto& surface) { return surface->IsRenderTarget(); });
111 sampled_textures.clear();
112 return any_rt;
113 }
114
115 TView GetDepthBufferSurface(bool preserve_contents) {
116 std::lock_guard lock{mutex};
117 auto& maxwell3d = system.GPU().Maxwell3D();
118
119 if (!maxwell3d.dirty_flags.zeta_buffer) {
120 return depth_buffer.view;
121 }
122 maxwell3d.dirty_flags.zeta_buffer = false;
123
124 const auto& regs{maxwell3d.regs};
125 const auto gpu_addr{regs.zeta.Address()};
126 if (!gpu_addr || !regs.zeta_enable) {
127 SetEmptyDepthBuffer();
128 return {};
129 }
130 const auto depth_params{SurfaceParams::CreateForDepthBuffer(
131 system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
132 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
135 if (depth_buffer.target)
136 depth_buffer.target->MarkAsRenderTarget(false);
137 depth_buffer.target = surface_view.first;
138 depth_buffer.view = surface_view.second;
139 if (depth_buffer.target)
140 depth_buffer.target->MarkAsRenderTarget(true);
141 return surface_view.second;
142 }
143
144 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
145 std::lock_guard lock{mutex};
146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
147 auto& maxwell3d = system.GPU().Maxwell3D();
148 if (!maxwell3d.dirty_flags.color_buffer[index]) {
149 return render_targets[index].view;
150 }
151 maxwell3d.dirty_flags.color_buffer.reset(index);
152
153 const auto& regs{maxwell3d.regs};
154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
155 regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
156 SetEmptyColorBuffer(index);
157 return {};
158 }
159
160 const auto& config{regs.rt[index]};
161 const auto gpu_addr{config.Address()};
162 if (!gpu_addr) {
163 SetEmptyColorBuffer(index);
164 return {};
165 }
166
167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
168 preserve_contents, true);
169 if (render_targets[index].target)
170 render_targets[index].target->MarkAsRenderTarget(false);
171 render_targets[index].target = surface_view.first;
172 render_targets[index].view = surface_view.second;
173 if (render_targets[index].target)
174 render_targets[index].target->MarkAsRenderTarget(true);
175 return surface_view.second;
176 }
177
178 void MarkColorBufferInUse(std::size_t index) {
179 if (auto& render_target = render_targets[index].target) {
180 render_target->MarkAsModified(true, Tick());
181 }
182 }
183
184 void MarkDepthBufferInUse() {
185 if (depth_buffer.target) {
186 depth_buffer.target->MarkAsModified(true, Tick());
187 }
188 }
189
190 void SetEmptyDepthBuffer() {
191 if (depth_buffer.target == nullptr) {
192 return;
193 }
194 depth_buffer.target->MarkAsRenderTarget(false);
195 depth_buffer.target = nullptr;
196 depth_buffer.view = nullptr;
197 }
198
199 void SetEmptyColorBuffer(std::size_t index) {
200 if (render_targets[index].target == nullptr) {
201 return;
202 }
203 render_targets[index].target->MarkAsRenderTarget(false);
204 render_targets[index].target = nullptr;
205 render_targets[index].view = nullptr;
206 }
207
208 void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
209 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
210 const Tegra::Engines::Fermi2D::Config& copy_config) {
211 std::lock_guard lock{mutex};
212 std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
213 std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
214 ImageBlit(src_surface.second, dst_surface.second, copy_config);
215 dst_surface.first->MarkAsModified(true, Tick());
216 }
217
218 TSurface TryFindFramebufferSurface(const u8* host_ptr) {
219 const CacheAddr cache_addr = ToCacheAddr(host_ptr);
220 if (!cache_addr) {
221 return nullptr;
222 }
223 const CacheAddr page = cache_addr >> registry_page_bits;
224 std::vector<TSurface>& list = registry[page];
225 for (auto& surface : list) {
226 if (surface->GetCacheAddr() == cache_addr) {
227 return surface;
228 }
229 }
230 return nullptr;
231 }
232
233 u64 Tick() {
234 return ++ticks;
235 }
236
237protected:
238 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
239 : system{system}, rasterizer{rasterizer} {
240 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
241 SetEmptyColorBuffer(i);
242 }
243
244 SetEmptyDepthBuffer();
245 staging_cache.SetSize(2);
246
247 const auto make_siblings = [this](PixelFormat a, PixelFormat b) {
248 siblings_table[static_cast<std::size_t>(a)] = b;
249 siblings_table[static_cast<std::size_t>(b)] = a;
250 };
251 std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid);
252 make_siblings(PixelFormat::Z16, PixelFormat::R16U);
253 make_siblings(PixelFormat::Z32F, PixelFormat::R32F);
254 make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F);
255
256 sampled_textures.reserve(64);
257 }
258
259 ~TextureCache() = default;
260
261 virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0;
262
263 virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface,
264 const CopyParams& copy_params) = 0;
265
266 virtual void ImageBlit(TView& src_view, TView& dst_view,
267 const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
268
269 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
270 // and reading it from a sepparate buffer.
271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
272
273 void Register(TSurface surface) {
274 const GPUVAddr gpu_addr = surface->GetGpuAddr();
275 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
276 const std::size_t size = surface->GetSizeInBytes();
277 const std::optional<VAddr> cpu_addr =
278 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
279 if (!cache_ptr || !cpu_addr) {
280 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
281 gpu_addr);
282 return;
283 }
284 const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
285 surface->MarkAsContinuous(continuous);
286 surface->SetCacheAddr(cache_ptr);
287 surface->SetCpuAddr(*cpu_addr);
288 RegisterInnerCache(surface);
289 surface->MarkAsRegistered(true);
290 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
291 }
292
293 void Unregister(TSurface surface) {
294 if (guard_render_targets && surface->IsProtected()) {
295 return;
296 }
297 const GPUVAddr gpu_addr = surface->GetGpuAddr();
298 const CacheAddr cache_ptr = surface->GetCacheAddr();
299 const std::size_t size = surface->GetSizeInBytes();
300 const VAddr cpu_addr = surface->GetCpuAddr();
301 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
302 UnregisterInnerCache(surface);
303 surface->MarkAsRegistered(false);
304 ReserveSurface(surface->GetSurfaceParams(), surface);
305 }
306
307 TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
308 if (const auto surface = TryGetReservedSurface(params); surface) {
309 surface->SetGpuAddr(gpu_addr);
310 return surface;
311 }
312 // No reserved surface available, create a new one and reserve it
313 auto new_surface{CreateSurface(gpu_addr, params)};
314 return new_surface;
315 }
316
317 std::pair<TSurface, TView> GetFermiSurface(
318 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
319 SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config);
320 const GPUVAddr gpu_addr = config.Address();
321 return GetSurface(gpu_addr, params, true, false);
322 }
323
324 Core::System& system;
325
326private:
327 enum class RecycleStrategy : u32 {
328 Ignore = 0,
329 Flush = 1,
330 BufferCopy = 3,
331 };
332
333 /**
334 * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
335 * @param overlaps, the overlapping surfaces registered in the cache.
336 * @param params, the paremeters on the new surface.
337 * @param gpu_addr, the starting address of the new surface.
338 * @param untopological, tells the recycler that the texture has no way to match the overlaps
339 * due to topological reasons.
340 **/
341 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
342 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
343 if (Settings::values.use_accurate_gpu_emulation) {
344 return RecycleStrategy::Flush;
345 }
346 // 3D Textures decision
347 if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
348 return RecycleStrategy::Flush;
349 }
350 for (auto s : overlaps) {
351 const auto& s_params = s->GetSurfaceParams();
352 if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
353 return RecycleStrategy::Flush;
354 }
355 }
356 // Untopological decision
357 if (untopological == MatchTopologyResult::CompressUnmatch) {
358 return RecycleStrategy::Flush;
359 }
360 if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) {
361 return RecycleStrategy::Flush;
362 }
363 return RecycleStrategy::Ignore;
364 }
365
366 /**
367 * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
368 *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
369 *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
370 *new surface from that data.
371 * @param overlaps, the overlapping surfaces registered in the cache.
372 * @param params, the paremeters on the new surface.
373 * @param gpu_addr, the starting address of the new surface.
374 * @param preserve_contents, tells if the new surface should be loaded from meory or left blank
375 * @param untopological, tells the recycler that the texture has no way to match the overlaps
376 * due to topological reasons.
377 **/
378 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
379 const SurfaceParams& params, const GPUVAddr gpu_addr,
380 const bool preserve_contents,
381 const MatchTopologyResult untopological) {
382 const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
383 for (auto& surface : overlaps) {
384 Unregister(surface);
385 }
386 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
387 case RecycleStrategy::Ignore: {
388 return InitializeSurface(gpu_addr, params, do_load);
389 }
390 case RecycleStrategy::Flush: {
391 std::sort(overlaps.begin(), overlaps.end(),
392 [](const TSurface& a, const TSurface& b) -> bool {
393 return a->GetModificationTick() < b->GetModificationTick();
394 });
395 for (auto& surface : overlaps) {
396 FlushSurface(surface);
397 }
398 return InitializeSurface(gpu_addr, params, preserve_contents);
399 }
400 case RecycleStrategy::BufferCopy: {
401 auto new_surface = GetUncachedSurface(gpu_addr, params);
402 BufferCopy(overlaps[0], new_surface);
403 return {new_surface, new_surface->GetMainView()};
404 }
405 default: {
406 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
407 return InitializeSurface(gpu_addr, params, do_load);
408 }
409 }
410 }
411
412 /**
413 * `RebuildSurface` this method takes a single surface and recreates into another that
414 * may differ in format, target or width alingment.
415 * @param current_surface, the registered surface in the cache which we want to convert.
416 * @param params, the new surface params which we'll use to recreate the surface.
417 **/
418 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
419 bool is_render) {
420 const auto gpu_addr = current_surface->GetGpuAddr();
421 const auto& cr_params = current_surface->GetSurfaceParams();
422 TSurface new_surface;
423 if (cr_params.pixel_format != params.pixel_format && !is_render &&
424 GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
425 SurfaceParams new_params = params;
426 new_params.pixel_format = cr_params.pixel_format;
427 new_params.component_type = cr_params.component_type;
428 new_params.type = cr_params.type;
429 new_surface = GetUncachedSurface(gpu_addr, new_params);
430 } else {
431 new_surface = GetUncachedSurface(gpu_addr, params);
432 }
433 const auto& final_params = new_surface->GetSurfaceParams();
434 if (cr_params.type != final_params.type ||
435 (cr_params.component_type != final_params.component_type)) {
436 BufferCopy(current_surface, new_surface);
437 } else {
438 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
439 for (auto& brick : bricks) {
440 ImageCopy(current_surface, new_surface, brick);
441 }
442 }
443 Unregister(current_surface);
444 Register(new_surface);
445 new_surface->MarkAsModified(current_surface->IsModified(), Tick());
446 return {new_surface, new_surface->GetMainView()};
447 }
448
449 /**
450 * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
451 * params if it's an exact match, we return the main view of the registered surface. If it's
452 * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
453 * match but the targets don't, we create an overview View of the registered surface.
454 * @param current_surface, the registered surface in the cache which we want to convert.
455 * @param params, the new surface params which we want to check.
456 **/
457 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
458 const SurfaceParams& params, bool is_render) {
459 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
460 const bool matches_target = current_surface->MatchTarget(params.target);
461 const auto match_check = [&]() -> std::pair<TSurface, TView> {
462 if (matches_target) {
463 return {current_surface, current_surface->GetMainView()};
464 }
465 return {current_surface, current_surface->EmplaceOverview(params)};
466 };
467 if (!is_mirage) {
468 return match_check();
469 }
470 if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) {
471 return match_check();
472 }
473 return RebuildSurface(current_surface, params, is_render);
474 }
475
476 /**
477 * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
478 * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
479 * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
480 * for them, else we return nothing.
481 * @param overlaps, the overlapping surfaces registered in the cache.
482 * @param params, the paremeters on the new surface.
483 * @param gpu_addr, the starting address of the new surface.
484 **/
485 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
486 const SurfaceParams& params,
487 const GPUVAddr gpu_addr) {
488 if (params.target == SurfaceTarget::Texture3D) {
489 return {};
490 }
491 bool modified = false;
492 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
493 u32 passed_tests = 0;
494 for (auto& surface : overlaps) {
495 const SurfaceParams& src_params = surface->GetSurfaceParams();
496 if (src_params.is_layered || src_params.num_levels > 1) {
497 // We send this cases to recycle as they are more complex to handle
498 return {};
499 }
500 const std::size_t candidate_size = surface->GetSizeInBytes();
501 auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
502 if (!mipmap_layer) {
503 continue;
504 }
505 const auto [layer, mipmap] = *mipmap_layer;
506 if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
507 continue;
508 }
509 modified |= surface->IsModified();
510 // Now we got all the data set up
511 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
512 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
513 const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1);
514 passed_tests++;
515 ImageCopy(surface, new_surface, copy_params);
516 }
517 if (passed_tests == 0) {
518 return {};
519 // In Accurate GPU all tests should pass, else we recycle
520 } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
521 return {};
522 }
523 for (auto surface : overlaps) {
524 Unregister(surface);
525 }
526 new_surface->MarkAsModified(modified, Tick());
527 Register(new_surface);
528 return {{new_surface, new_surface->GetMainView()}};
529 }
530
531 /**
532 * `GetSurface` gets the starting address and parameters of a candidate surface and tries
533 * to find a matching surface within the cache. This is done in 3 big steps. The first is to
534 * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
535 * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
536 * memory else we move to step 3. Step 3 consists on figuring the relationship between the
537 * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
538 * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
539 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
540 * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
541 * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
542 * @param gpu_addr, the starting address of the candidate surface.
543 * @param params, the paremeters on the candidate surface.
544 * @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
545 **/
546 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
547 bool preserve_contents, bool is_render) {
548 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
549 const auto cache_addr{ToCacheAddr(host_ptr)};
550
551 // Step 0: guarantee a valid surface
552 if (!cache_addr) {
553 // Return a null surface if it's invalid
554 SurfaceParams new_params = params;
555 new_params.width = 1;
556 new_params.height = 1;
557 new_params.depth = 1;
558 new_params.block_height = 0;
559 new_params.block_depth = 0;
560 return InitializeSurface(gpu_addr, new_params, false);
561 }
562
563 // Step 1
564 // Check Level 1 Cache for a fast structural match. If candidate surface
565 // matches at certain level we are pretty much done.
566 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
567 TSurface& current_surface = iter->second;
568 const auto topological_result = current_surface->MatchesTopology(params);
569 if (topological_result != MatchTopologyResult::FullMatch) {
570 std::vector<TSurface> overlaps{current_surface};
571 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
572 topological_result);
573 }
574 const auto struct_result = current_surface->MatchesStructure(params);
575 if (struct_result != MatchStructureResult::None &&
576 (params.target != SurfaceTarget::Texture3D ||
577 current_surface->MatchTarget(params.target))) {
578 if (struct_result == MatchStructureResult::FullMatch) {
579 return ManageStructuralMatch(current_surface, params, is_render);
580 } else {
581 return RebuildSurface(current_surface, params, is_render);
582 }
583 }
584 }
585
586 // Step 2
587 // Obtain all possible overlaps in the memory region
588 const std::size_t candidate_size = params.GetGuestSizeInBytes();
589 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
590
591 // If none are found, we are done. we just load the surface and create it.
592 if (overlaps.empty()) {
593 return InitializeSurface(gpu_addr, params, preserve_contents);
594 }
595
596 // Step 3
597 // Now we need to figure the relationship between the texture and its overlaps
598 // we do a topological test to ensure we can find some relationship. If it fails
599 // inmediatly recycle the texture
600 for (const auto& surface : overlaps) {
601 const auto topological_result = surface->MatchesTopology(params);
602 if (topological_result != MatchTopologyResult::FullMatch) {
603 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
604 topological_result);
605 }
606 }
607
608 // Split cases between 1 overlap or many.
609 if (overlaps.size() == 1) {
610 TSurface current_surface = overlaps[0];
611 // First check if the surface is within the overlap. If not, it means
612 // two things either the candidate surface is a supertexture of the overlap
613 // or they don't match in any known way.
614 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
615 if (current_surface->GetGpuAddr() == gpu_addr) {
616 std::optional<std::pair<TSurface, TView>> view =
617 TryReconstructSurface(overlaps, params, gpu_addr);
618 if (view) {
619 return *view;
620 }
621 }
622 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
623 MatchTopologyResult::FullMatch);
624 }
625 // Now we check if the candidate is a mipmap/layer of the overlap
626 std::optional<TView> view =
627 current_surface->EmplaceView(params, gpu_addr, candidate_size);
628 if (view) {
629 const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
630 if (is_mirage) {
631 // On a mirage view, we need to recreate the surface under this new view
632 // and then obtain a view again.
633 SurfaceParams new_params = current_surface->GetSurfaceParams();
634 const u32 wh = SurfaceParams::ConvertWidth(
635 new_params.width, new_params.pixel_format, params.pixel_format);
636 const u32 hh = SurfaceParams::ConvertHeight(
637 new_params.height, new_params.pixel_format, params.pixel_format);
638 new_params.width = wh;
639 new_params.height = hh;
640 new_params.pixel_format = params.pixel_format;
641 std::pair<TSurface, TView> pair =
642 RebuildSurface(current_surface, new_params, is_render);
643 std::optional<TView> mirage_view =
644 pair.first->EmplaceView(params, gpu_addr, candidate_size);
645 if (mirage_view)
646 return {pair.first, *mirage_view};
647 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
648 MatchTopologyResult::FullMatch);
649 }
650 return {current_surface, *view};
651 }
652 // The next case is unsafe, so if we r in accurate GPU, just skip it
653 if (Settings::values.use_accurate_gpu_emulation) {
654 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
655 MatchTopologyResult::FullMatch);
656 }
657 // This is the case the texture is a part of the parent.
658 if (current_surface->MatchesSubTexture(params, gpu_addr)) {
659 return RebuildSurface(current_surface, params, is_render);
660 }
661 } else {
662 // If there are many overlaps, odds are they are subtextures of the candidate
663 // surface. We try to construct a new surface based on the candidate parameters,
664 // using the overlaps. If a single overlap fails, this will fail.
665 std::optional<std::pair<TSurface, TView>> view =
666 TryReconstructSurface(overlaps, params, gpu_addr);
667 if (view) {
668 return *view;
669 }
670 }
671 // We failed all the tests, recycle the overlaps into a new texture.
672 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
673 MatchTopologyResult::FullMatch);
674 }
675
676 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
677 bool preserve_contents) {
678 auto new_surface{GetUncachedSurface(gpu_addr, params)};
679 Register(new_surface);
680 if (preserve_contents) {
681 LoadSurface(new_surface);
682 }
683 return {new_surface, new_surface->GetMainView()};
684 }
685
686 void LoadSurface(const TSurface& surface) {
687 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
688 surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
689 surface->UploadTexture(staging_cache.GetBuffer(0));
690 surface->MarkAsModified(false, Tick());
691 }
692
693 void FlushSurface(const TSurface& surface) {
694 if (!surface->IsModified()) {
695 return;
696 }
697 staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
698 surface->DownloadTexture(staging_cache.GetBuffer(0));
699 surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
700 surface->MarkAsModified(false, Tick());
701 }
702
703 void RegisterInnerCache(TSurface& surface) {
704 const CacheAddr cache_addr = surface->GetCacheAddr();
705 CacheAddr start = cache_addr >> registry_page_bits;
706 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
707 l1_cache[cache_addr] = surface;
708 while (start <= end) {
709 registry[start].push_back(surface);
710 start++;
711 }
712 }
713
714 void UnregisterInnerCache(TSurface& surface) {
715 const CacheAddr cache_addr = surface->GetCacheAddr();
716 CacheAddr start = cache_addr >> registry_page_bits;
717 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits;
718 l1_cache.erase(cache_addr);
719 while (start <= end) {
720 auto& reg{registry[start]};
721 reg.erase(std::find(reg.begin(), reg.end(), surface));
722 start++;
723 }
724 }
725
726 std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) {
727 if (size == 0) {
728 return {};
729 }
730 const CacheAddr cache_addr_end = cache_addr + size;
731 CacheAddr start = cache_addr >> registry_page_bits;
732 const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits;
733 std::vector<TSurface> surfaces;
734 while (start <= end) {
735 std::vector<TSurface>& list = registry[start];
736 for (auto& surface : list) {
737 if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) {
738 surface->MarkAsPicked(true);
739 surfaces.push_back(surface);
740 }
741 }
742 start++;
743 }
744 for (auto& surface : surfaces) {
745 surface->MarkAsPicked(false);
746 }
747 return surfaces;
748 }
749
750 void ReserveSurface(const SurfaceParams& params, TSurface surface) {
751 surface_reserve[params].push_back(std::move(surface));
752 }
753
754 TSurface TryGetReservedSurface(const SurfaceParams& params) {
755 auto search{surface_reserve.find(params)};
756 if (search == surface_reserve.end()) {
757 return {};
758 }
759 for (auto& surface : search->second) {
760 if (!surface->IsRegistered()) {
761 return surface;
762 }
763 }
764 return {};
765 }
766
767 constexpr PixelFormat GetSiblingFormat(PixelFormat format) const {
768 return siblings_table[static_cast<std::size_t>(format)];
769 }
770
771 struct FramebufferTargetInfo {
772 TSurface target;
773 TView view;
774 };
775
776 VideoCore::RasterizerInterface& rasterizer;
777
778 u64 ticks{};
779
780 // Guards the cache for protection conflicts.
781 bool guard_render_targets{};
782 bool guard_samplers{};
783
784 // The siblings table is for formats that can inter exchange with one another
785 // without causing issues. This is only valid when a conflict occurs on a non
786 // rendering use.
787 std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table;
788
789 // The internal Cache is different for the Texture Cache. It's based on buckets
790 // of 1MB. This fits better for the purpose of this cache as textures are normaly
791 // large in size.
792 static constexpr u64 registry_page_bits{20};
793 static constexpr u64 registry_page_size{1 << registry_page_bits};
794 std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
795
796 // The L1 Cache is used for fast texture lookup before checking the overlaps
797 // This avoids calculating size and other stuffs.
798 std::unordered_map<CacheAddr, TSurface> l1_cache;
799
800 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
801 /// previously been used. This is to prevent surfaces from being constantly created and
802 /// destroyed when used with different surface parameters.
803 std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve;
804 std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
805 render_targets;
806 FramebufferTargetInfo depth_buffer;
807
808 std::vector<TSurface> sampled_textures;
809
810 StagingCache staging_cache;
811 std::recursive_mutex mutex;
812};
813
814} // namespace VideoCommon