summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2019-06-13 10:26:17 -0400
committerGravatar ReinUsesLisp2019-06-20 21:38:34 -0300
commit4db28f72f617b1500581e621719928fa0807d9ac (patch)
treec282bb335ed9f714610db3f010561c71506b1647 /src
parenttexture_cache: Implement siblings texture formats. (diff)
downloadyuzu-4db28f72f617b1500581e621719928fa0807d9ac.tar.gz
yuzu-4db28f72f617b1500581e621719928fa0807d9ac.tar.xz
yuzu-4db28f72f617b1500581e621719928fa0807d9ac.zip
texture_cache: Remove old rasterizer cache
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp1381
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h575
2 files changed, 0 insertions, 1956 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
deleted file mode 100644
index e27da1fa7..000000000
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ /dev/null
@@ -1,1381 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <optional>
7#include <glad/glad.h>
8
9#include "common/alignment.h"
10#include "common/assert.h"
11#include "common/logging/log.h"
12#include "common/microprofile.h"
13#include "common/scope_exit.h"
14#include "core/core.h"
15#include "core/hle/kernel/process.h"
16#include "core/settings.h"
17#include "video_core/engines/maxwell_3d.h"
18#include "video_core/memory_manager.h"
19#include "video_core/morton.h"
20#include "video_core/renderer_opengl/gl_rasterizer.h"
21#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
22#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h"
24#include "video_core/textures/convert.h"
25#include "video_core/textures/decoders.h"
26
27namespace OpenGL {
28
29using VideoCore::MortonSwizzle;
30using VideoCore::MortonSwizzleMode;
31using VideoCore::Surface::ComponentTypeFromDepthFormat;
32using VideoCore::Surface::ComponentTypeFromRenderTarget;
33using VideoCore::Surface::ComponentTypeFromTexture;
34using VideoCore::Surface::PixelFormatFromDepthFormat;
35using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
36using VideoCore::Surface::PixelFormatFromTextureFormat;
37using VideoCore::Surface::SurfaceTargetFromTextureType;
38
39struct FormatTuple {
40 GLint internal_format;
41 GLenum format;
42 GLenum type;
43 ComponentType component_type;
44 bool compressed;
45};
46
47static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
48 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
49 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
50 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
51 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
52 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
53 if (max_mip_level == 1) {
54 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
55 }
56}
57
58void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) {
59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
60
61 gpu_addr = gpu_addr_;
62 host_ptr = memory_manager.GetPointer(gpu_addr_);
63 size_in_bytes = SizeInBytesRaw();
64
65 if (IsPixelFormatASTC(pixel_format)) {
66 // ASTC is uncompressed in software, in emulated as RGBA8
67 size_in_bytes_gl = width * height * depth * 4;
68 } else {
69 size_in_bytes_gl = SizeInBytesGL();
70 }
71}
72
73std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only,
74 bool uncompressed) const {
75 const u32 tile_x{GetDefaultBlockWidth(pixel_format)};
76 const u32 tile_y{GetDefaultBlockHeight(pixel_format)};
77 const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
78 u32 m_depth = (layer_only ? 1U : depth);
79 u32 m_width = MipWidth(mip_level);
80 u32 m_height = MipHeight(mip_level);
81 m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x);
82 m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y);
83 m_depth = std::max(1U, m_depth >> mip_level);
84 u32 m_block_height = MipBlockHeight(mip_level);
85 u32 m_block_depth = MipBlockDepth(mip_level);
86 return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width,
87 m_height, m_depth, m_block_height, m_block_depth);
88}
89
90std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
91 bool uncompressed) const {
92 std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth;
93 std::size_t size = 0;
94 for (u32 i = 0; i < max_mip_level; i++) {
95 size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed);
96 }
97 if (!force_gl && is_tiled) {
98 size = Common::AlignUp(size, block_size_bytes);
99 }
100 return size;
101}
102
103/*static*/ SurfaceParams SurfaceParams::CreateForTexture(
104 const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) {
105 SurfaceParams params{};
106 params.is_tiled = config.tic.IsTiled();
107 params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
108 params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
109 params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
110 params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
111 params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
112 params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
113 params.srgb_conversion);
114
115 if (config.tsc.depth_compare_enabled) {
116 // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
117 // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
118 // causes GetFormatType to properly return 'Depth' below).
119 if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) {
120 switch (params.pixel_format) {
121 case PixelFormat::R16S:
122 case PixelFormat::R16U:
123 case PixelFormat::R16F:
124 params.pixel_format = PixelFormat::Z16;
125 break;
126 case PixelFormat::R32F:
127 params.pixel_format = PixelFormat::Z32F;
128 break;
129 default:
130 LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}",
131 static_cast<u32>(params.pixel_format));
132 break;
133 }
134 }
135 }
136
137 params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
138 params.type = GetFormatType(params.pixel_format);
139 UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled);
140
141 params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
142 params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
143 if (config.tic.IsLineal()) {
144 params.pitch = config.tic.Pitch();
145 }
146 params.unaligned_height = config.tic.Height();
147 params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
148 params.identity = SurfaceClass::Uploaded;
149
150 switch (params.target) {
151 case SurfaceTarget::Texture1D:
152 case SurfaceTarget::TextureBuffer:
153 case SurfaceTarget::Texture2D:
154 params.depth = 1;
155 break;
156 case SurfaceTarget::TextureCubemap:
157 params.depth = config.tic.Depth() * 6;
158 break;
159 case SurfaceTarget::Texture3D:
160 params.depth = config.tic.Depth();
161 break;
162 case SurfaceTarget::Texture2DArray:
163 params.depth = config.tic.Depth();
164 if (!entry.IsArray()) {
165 // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of
166 // one, but sample the texture in the shader as if it were not an array texture. This
167 // probably is valid on hardware, but we still need to write a test to confirm this. In
168 // emulation, the workaround here is to continue to treat this as a Texture2D. An
169 // example game that does this is Super Mario Odyssey (in Cloud Kingdom).
170 ASSERT(params.depth == 1);
171 params.target = SurfaceTarget::Texture2D;
172 }
173 break;
174 case SurfaceTarget::TextureCubeArray:
175 params.depth = config.tic.Depth() * 6;
176 if (!entry.IsArray()) {
177 ASSERT(params.depth == 6);
178 params.target = SurfaceTarget::TextureCubemap;
179 }
180 break;
181 default:
182 LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
183 UNREACHABLE();
184 params.depth = 1;
185 break;
186 }
187
188 params.is_layered = SurfaceTargetIsLayered(params.target);
189 params.is_array = SurfaceTargetIsArray(params.target);
190 params.max_mip_level = config.tic.max_mip_level + 1;
191 params.rt = {};
192
193 params.InitCacheParameters(config.tic.Address());
194
195 return params;
196}
197
198/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) {
199 const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
200 SurfaceParams params{};
201
202 params.is_tiled =
203 config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
204 params.block_width = 1 << config.memory_layout.block_width;
205 params.block_height = 1 << config.memory_layout.block_height;
206 params.block_depth = 1 << config.memory_layout.block_depth;
207 params.tile_width_spacing = 1;
208 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
209 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
210 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
211 params.component_type = ComponentTypeFromRenderTarget(config.format);
212 params.type = GetFormatType(params.pixel_format);
213 if (params.is_tiled) {
214 params.width = config.width;
215 } else {
216 params.pitch = config.width;
217 const u32 bpp = params.GetFormatBpp() / 8;
218 params.width = params.pitch / bpp;
219 }
220 params.height = config.height;
221 params.unaligned_height = config.height;
222 params.target = SurfaceTarget::Texture2D;
223 params.identity = SurfaceClass::RenderTarget;
224 params.depth = 1;
225 params.max_mip_level = 1;
226 params.is_layered = false;
227
228 // Render target specific parameters, not used for caching
229 params.rt.index = static_cast<u32>(index);
230 params.rt.array_mode = config.array_mode;
231 params.rt.layer_stride = config.layer_stride;
232 params.rt.volume = config.volume;
233 params.rt.base_layer = config.base_layer;
234
235 params.InitCacheParameters(config.Address());
236
237 return params;
238}
239
240/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(
241 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
242 u32 block_width, u32 block_height, u32 block_depth,
243 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
244 SurfaceParams params{};
245
246 params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
247 params.block_width = 1 << std::min(block_width, 5U);
248 params.block_height = 1 << std::min(block_height, 5U);
249 params.block_depth = 1 << std::min(block_depth, 5U);
250 params.tile_width_spacing = 1;
251 params.pixel_format = PixelFormatFromDepthFormat(format);
252 params.component_type = ComponentTypeFromDepthFormat(format);
253 params.type = GetFormatType(params.pixel_format);
254 params.srgb_conversion = false;
255 params.width = zeta_width;
256 params.height = zeta_height;
257 params.unaligned_height = zeta_height;
258 params.target = SurfaceTarget::Texture2D;
259 params.identity = SurfaceClass::DepthBuffer;
260 params.depth = 1;
261 params.max_mip_level = 1;
262 params.is_layered = false;
263 params.rt = {};
264
265 params.InitCacheParameters(zeta_address);
266
267 return params;
268}
269
270/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
271 const Tegra::Engines::Fermi2D::Regs::Surface& config) {
272 SurfaceParams params{};
273
274 params.is_tiled = !config.linear;
275 params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0,
276 params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0,
277 params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0,
278 params.tile_width_spacing = 1;
279 params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
280 params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB ||
281 config.format == Tegra::RenderTargetFormat::RGBA8_SRGB;
282 params.component_type = ComponentTypeFromRenderTarget(config.format);
283 params.type = GetFormatType(params.pixel_format);
284 params.width = config.width;
285 params.pitch = config.pitch;
286 params.height = config.height;
287 params.unaligned_height = config.height;
288 params.target = SurfaceTarget::Texture2D;
289 params.identity = SurfaceClass::Copy;
290 params.depth = 1;
291 params.max_mip_level = 1;
292 params.rt = {};
293
294 params.InitCacheParameters(config.Address());
295
296 return params;
297}
298
299static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
300 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
301 {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
302 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
303 {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
304 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
305 false}, // A2B10G10R10U
306 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
307 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
308 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
309 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
310 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
311 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
312 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
313 false}, // R11FG11FB10F
314 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
315 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
316 true}, // DXT1
317 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
318 true}, // DXT23
319 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
320 true}, // DXT45
321 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
322 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
323 true}, // DXN2UNORM
324 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
325 {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
326 true}, // BC7U
327 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
328 true}, // BC6H_UF16
329 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
330 true}, // BC6H_SF16
331 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
332 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
333 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
334 {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
335 {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
336 {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
337 {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
338 {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
339 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
340 {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
341 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
342 {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
343 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
344 {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
345 {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
346 {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
347 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
348 false}, // RGBA8_SRGB
349 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
350 {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
351 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
352 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
353 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
354 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
355 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
356 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
357 // Compressed sRGB formats
358 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
359 true}, // DXT1_SRGB
360 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
361 true}, // DXT23_SRGB
362 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
363 true}, // DXT45_SRGB
364 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
365 true}, // BC7U_SRGB
366 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
367 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
368 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
369 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
370 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
371 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
372 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
373 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
374
375 // Depth formats
376 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
377 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
378 false}, // Z16
379
380 // DepthStencil formats
381 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
382 false}, // Z24S8
383 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
384 false}, // S8Z24
385 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
386 ComponentType::Float, false}, // Z32FS8
387}};
388
389static GLenum SurfaceTargetToGL(SurfaceTarget target) {
390 switch (target) {
391 case SurfaceTarget::Texture1D:
392 return GL_TEXTURE_1D;
393 case SurfaceTarget::TextureBuffer:
394 return GL_TEXTURE_BUFFER;
395 case SurfaceTarget::Texture2D:
396 return GL_TEXTURE_2D;
397 case SurfaceTarget::Texture3D:
398 return GL_TEXTURE_3D;
399 case SurfaceTarget::Texture1DArray:
400 return GL_TEXTURE_1D_ARRAY;
401 case SurfaceTarget::Texture2DArray:
402 return GL_TEXTURE_2D_ARRAY;
403 case SurfaceTarget::TextureCubemap:
404 return GL_TEXTURE_CUBE_MAP;
405 case SurfaceTarget::TextureCubeArray:
406 return GL_TEXTURE_CUBE_MAP_ARRAY;
407 }
408 LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
409 UNREACHABLE();
410 return {};
411}
412
413static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
414 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
415 auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
416 ASSERT(component_type == format.component_type);
417
418 return format;
419}
420
421/// Returns the discrepant array target
422constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) {
423 switch (target) {
424 case SurfaceTarget::Texture1D:
425 return GL_TEXTURE_1D_ARRAY;
426 case SurfaceTarget::Texture2D:
427 return GL_TEXTURE_2D_ARRAY;
428 case SurfaceTarget::Texture3D:
429 return GL_NONE;
430 case SurfaceTarget::Texture1DArray:
431 return GL_TEXTURE_1D;
432 case SurfaceTarget::Texture2DArray:
433 return GL_TEXTURE_2D;
434 case SurfaceTarget::TextureCubemap:
435 return GL_TEXTURE_CUBE_MAP_ARRAY;
436 case SurfaceTarget::TextureCubeArray:
437 return GL_TEXTURE_CUBE_MAP;
438 }
439 return GL_NONE;
440}
441
442Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const {
443 u32 actual_height{std::max(1U, unaligned_height >> mip_level)};
444 if (IsPixelFormatASTC(pixel_format)) {
445 // ASTC formats must stop at the ATSC block size boundary
446 actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second);
447 }
448 return {0, actual_height, MipWidth(mip_level), 0};
449}
450
451void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
452 std::vector<u8>& gl_buffer, u32 mip_level) {
453 u32 depth = params.MipDepth(mip_level);
454 if (params.target == SurfaceTarget::Texture2D) {
455 // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
456 depth = 1U;
457 }
458 if (params.is_layered) {
459 u64 offset = params.GetMipmapLevelOffset(mip_level);
460 u64 offset_gl = 0;
461 const u64 layer_size = params.LayerMemorySize();
462 const u64 gl_size = params.LayerSizeGL(mip_level);
463 for (u32 i = 0; i < params.depth; i++) {
464 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
465 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
466 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
467 gl_buffer.data() + offset_gl, params.host_ptr + offset);
468 offset += layer_size;
469 offset_gl += gl_size;
470 }
471 } else {
472 const u64 offset = params.GetMipmapLevelOffset(mip_level);
473 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
474 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
475 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
476 gl_buffer.data(), params.host_ptr + offset);
477 }
478}
479
480void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface,
481 const Surface& dst_surface) {
482 const auto& src_params{src_surface->GetSurfaceParams()};
483 const auto& dst_params{dst_surface->GetSurfaceParams()};
484
485 const u32 width{std::min(src_params.width, dst_params.width)};
486 const u32 height{std::min(src_params.height, dst_params.height)};
487
488 glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0,
489 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0,
490 0, 0, width, height, 1);
491
492 dst_surface->MarkAsModified(true, *this);
493}
494
495MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64));
496void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface,
497 const GLuint copy_pbo_handle, const GLenum src_attachment,
498 const GLenum dst_attachment,
499 const std::size_t cubemap_face) {
500 MICROPROFILE_SCOPE(OpenGL_CopySurface);
501 ASSERT_MSG(dst_attachment == 0, "Unimplemented");
502
503 const auto& src_params{src_surface->GetSurfaceParams()};
504 const auto& dst_params{dst_surface->GetSurfaceParams()};
505
506 const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
507 const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
508
509 const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
510
511 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
512 glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY);
513 if (source_format.compressed) {
514 glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
515 static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
516 } else {
517 glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format,
518 source_format.type, static_cast<GLsizei>(src_params.size_in_bytes),
519 nullptr);
520 }
521 // If the new texture is bigger than the previous one, we need to fill in the rest with data
522 // from the CPU.
523 if (src_params.size_in_bytes < dst_params.size_in_bytes) {
524 // Upload the rest of the memory.
525 if (dst_params.is_tiled) {
526 // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest
527 // of the data in this case. Games like Super Mario Odyssey seem to hit this case
528 // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
529 // but it doesn't clear it beforehand, the texture is already full of zeros.
530 LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
531 "reinterpretation but the texture is tiled.");
532 }
533 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
534 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
535 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
536 memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
537 }
538
539 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
540
541 const GLsizei width{static_cast<GLsizei>(
542 std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))};
543 const GLsizei height{static_cast<GLsizei>(
544 std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))};
545
546 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle);
547 if (dest_format.compressed) {
548 LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
549 UNREACHABLE();
550 } else {
551 switch (dst_params.target) {
552 case SurfaceTarget::Texture1D:
553 glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format,
554 dest_format.type, nullptr);
555 break;
556 case SurfaceTarget::Texture2D:
557 glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height,
558 dest_format.format, dest_format.type, nullptr);
559 break;
560 case SurfaceTarget::Texture3D:
561 case SurfaceTarget::Texture2DArray:
562 case SurfaceTarget::TextureCubeArray:
563 glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height,
564 static_cast<GLsizei>(dst_params.depth), dest_format.format,
565 dest_format.type, nullptr);
566 break;
567 case SurfaceTarget::TextureCubemap:
568 glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0,
569 static_cast<GLint>(cubemap_face), width, height, 1,
570 dest_format.format, dest_format.type, nullptr);
571 break;
572 default:
573 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
574 static_cast<u32>(dst_params.target));
575 UNREACHABLE();
576 }
577 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
578 }
579
580 dst_surface->MarkAsModified(true, *this);
581}
582
583CachedSurface::CachedSurface(const SurfaceParams& params)
584 : RasterizerCacheObject{params.host_ptr}, params{params},
585 gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} {
586
587 const auto optional_cpu_addr{
588 Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)};
589 ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid");
590 cpu_addr = *optional_cpu_addr;
591
592 texture.Create(gl_target);
593
594 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
595 // alternatives. This signals a bug on those functions.
596 const auto width = static_cast<GLsizei>(params.MipWidth(0));
597 const auto height = static_cast<GLsizei>(params.MipHeight(0));
598 memory_size = params.MemorySize();
599 reinterpreted = false;
600
601 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
602 gl_internal_format = format_tuple.internal_format;
603
604 switch (params.target) {
605 case SurfaceTarget::Texture1D:
606 glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width);
607 break;
608 case SurfaceTarget::TextureBuffer:
609 texture_buffer.Create();
610 glNamedBufferStorage(texture_buffer.handle,
611 params.width * GetBytesPerPixel(params.pixel_format), nullptr,
612 GL_DYNAMIC_STORAGE_BIT);
613 glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle);
614 break;
615 case SurfaceTarget::Texture2D:
616 case SurfaceTarget::TextureCubemap:
617 glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
618 break;
619 case SurfaceTarget::Texture3D:
620 case SurfaceTarget::Texture2DArray:
621 case SurfaceTarget::TextureCubeArray:
622 glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height,
623 params.depth);
624 break;
625 default:
626 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
627 static_cast<u32>(params.target));
628 UNREACHABLE();
629 glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height);
630 }
631
632 if (params.target != SurfaceTarget::TextureBuffer) {
633 ApplyTextureDefaults(texture.handle, params.max_mip_level);
634 }
635
636 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
637}
638
639MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
640void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
641 MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
642 auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
643 if (gl_buffer.size() < params.max_mip_level)
644 gl_buffer.resize(params.max_mip_level);
645 for (u32 i = 0; i < params.max_mip_level; i++)
646 gl_buffer[i].resize(params.GetMipmapSizeGL(i));
647 if (params.is_tiled) {
648 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
649 params.block_width, static_cast<u32>(params.target));
650 for (u32 i = 0; i < params.max_mip_level; i++)
651 SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i);
652 } else {
653 const u32 bpp = params.GetFormatBpp() / 8;
654 const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) /
655 GetDefaultBlockWidth(params.pixel_format);
656 if (params.pitch == copy_size) {
657 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
658 } else {
659 const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) /
660 GetDefaultBlockHeight(params.pixel_format);
661 const u8* start{params.host_ptr};
662 u8* write_to = gl_buffer[0].data();
663 for (u32 h = height; h > 0; h--) {
664 std::memcpy(write_to, start, copy_size);
665 start += params.pitch;
666 write_to += copy_size;
667 }
668 }
669 }
670 for (u32 i = 0; i < params.max_mip_level; i++) {
671 const u32 width = params.MipWidth(i);
672 const u32 height = params.MipHeight(i);
673 const u32 depth = params.MipDepth(i);
674 if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) {
675 // Reserve size for RGBA8 conversion
676 constexpr std::size_t rgba_bpp = 4;
677 gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp));
678 }
679 Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width,
680 height, depth, true, true);
681 }
682}
683
684MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
685void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) {
686 MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
687
688 ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented");
689
690 auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
691 // OpenGL temporary buffer needs to be big enough to store raw texture size
692 gl_buffer[0].resize(GetSizeInBytes());
693
694 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
695 const u32 align = std::clamp(params.RowAlign(0), 1U, 8U);
696 glPixelStorei(GL_PACK_ALIGNMENT, align);
697 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
698 ASSERT(!tuple.compressed);
699 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
700 glGetTextureImage(texture.handle, 0, tuple.format, tuple.type,
701 static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data());
702 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
703 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
704 params.height, params.depth, true, true);
705 if (params.is_tiled) {
706 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
707 params.block_width, static_cast<u32>(params.target));
708
709 SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0);
710 } else {
711 const u32 bpp = params.GetFormatBpp() / 8;
712 const u32 copy_size = params.width * bpp;
713 if (params.pitch == copy_size) {
714 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
715 } else {
716 u8* start{params.host_ptr};
717 const u8* read_to = gl_buffer[0].data();
718 for (u32 h = params.height; h > 0; h--) {
719 std::memcpy(start, read_to, copy_size);
720 start += params.pitch;
721 read_to += copy_size;
722 }
723 }
724 }
725}
726
727void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
728 GLuint read_fb_handle, GLuint draw_fb_handle) {
729 const auto& rect{params.GetRect(mip_map)};
730
731 auto& gl_buffer = res_cache_tmp_mem.gl_buffer;
732
733 // Load data from memory to the surface
734 const auto x0 = static_cast<GLint>(rect.left);
735 const auto y0 = static_cast<GLint>(rect.bottom);
736 auto buffer_offset =
737 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
738 static_cast<std::size_t>(x0)) *
739 GetBytesPerPixel(params.pixel_format);
740
741 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
742
743 const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U);
744 glPixelStorei(GL_UNPACK_ALIGNMENT, align);
745 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
746
747 const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
748 if (tuple.compressed) {
749 switch (params.target) {
750 case SurfaceTarget::Texture2D:
751 glCompressedTextureSubImage2D(
752 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
753 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
754 &gl_buffer[mip_map][buffer_offset]);
755 break;
756 case SurfaceTarget::Texture3D:
757 glCompressedTextureSubImage3D(
758 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
759 static_cast<GLsizei>(params.MipHeight(mip_map)),
760 static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
761 &gl_buffer[mip_map][buffer_offset]);
762 break;
763 case SurfaceTarget::Texture2DArray:
764 case SurfaceTarget::TextureCubeArray:
765 glCompressedTextureSubImage3D(
766 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
767 static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
768 tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
769 break;
770 case SurfaceTarget::TextureCubemap: {
771 const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
772 for (std::size_t face = 0; face < params.depth; ++face) {
773 glCompressedTextureSubImage3D(
774 texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
775 static_cast<GLsizei>(params.MipWidth(mip_map)),
776 static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
777 layer_size, &gl_buffer[mip_map][buffer_offset]);
778 buffer_offset += layer_size;
779 }
780 break;
781 }
782 default:
783 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
784 static_cast<u32>(params.target));
785 UNREACHABLE();
786 glCompressedTextureSubImage2D(
787 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
788 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
789 static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
790 }
791 } else {
792 switch (params.target) {
793 case SurfaceTarget::Texture1D:
794 glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
795 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
796 break;
797 case SurfaceTarget::TextureBuffer:
798 ASSERT(mip_map == 0);
799 glNamedBufferSubData(texture_buffer.handle, x0,
800 static_cast<GLsizeiptr>(rect.GetWidth()) *
801 GetBytesPerPixel(params.pixel_format),
802 &gl_buffer[mip_map][buffer_offset]);
803 break;
804 case SurfaceTarget::Texture2D:
805 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
806 static_cast<GLsizei>(rect.GetWidth()),
807 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
808 &gl_buffer[mip_map][buffer_offset]);
809 break;
810 case SurfaceTarget::Texture3D:
811 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
812 static_cast<GLsizei>(rect.GetWidth()),
813 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
814 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
815 break;
816 case SurfaceTarget::Texture2DArray:
817 case SurfaceTarget::TextureCubeArray:
818 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
819 static_cast<GLsizei>(rect.GetWidth()),
820 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
821 tuple.type, &gl_buffer[mip_map][buffer_offset]);
822 break;
823 case SurfaceTarget::TextureCubemap: {
824 for (std::size_t face = 0; face < params.depth; ++face) {
825 glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
826 static_cast<GLsizei>(rect.GetWidth()),
827 static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
828 tuple.type, &gl_buffer[mip_map][buffer_offset]);
829 buffer_offset += params.LayerSizeGL(mip_map);
830 }
831 break;
832 }
833 default:
834 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
835 static_cast<u32>(params.target));
836 UNREACHABLE();
837 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
838 static_cast<GLsizei>(rect.GetWidth()),
839 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
840 &gl_buffer[mip_map][buffer_offset]);
841 }
842 }
843
844 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
845}
846
847void CachedSurface::EnsureTextureDiscrepantView() {
848 if (discrepant_view.handle != 0)
849 return;
850
851 const GLenum target{GetArrayDiscrepantTarget(params.target)};
852 ASSERT(target != GL_NONE);
853
854 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
855 constexpr GLuint min_layer = 0;
856 constexpr GLuint min_level = 0;
857
858 glGenTextures(1, &discrepant_view.handle);
859 glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level,
860 params.max_mip_level, min_layer, num_layers);
861 ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level);
862 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
863 reinterpret_cast<const GLint*>(swizzle.data()));
864}
865
866MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
867void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem,
868 GLuint read_fb_handle, GLuint draw_fb_handle) {
869 MICROPROFILE_SCOPE(OpenGL_TextureUL);
870
871 for (u32 i = 0; i < params.max_mip_level; i++)
872 UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle);
873}
874
875void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
876 Tegra::Texture::SwizzleSource swizzle_y,
877 Tegra::Texture::SwizzleSource swizzle_z,
878 Tegra::Texture::SwizzleSource swizzle_w) {
879 if (params.target == SurfaceTarget::TextureBuffer) {
880 return;
881 }
882 const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
883 const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
884 const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
885 const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
886 if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
887 return;
888 }
889 swizzle = {new_x, new_y, new_z, new_w};
890 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
891 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
892 if (discrepant_view.handle != 0) {
893 glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
894 }
895}
896
897RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
898 : RasterizerCache{rasterizer} {
899 read_framebuffer.Create();
900 draw_framebuffer.Create();
901 copy_pbo.Create();
902}
903
904Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
905 const GLShader::SamplerEntry& entry) {
906 return GetSurface(SurfaceParams::CreateForTexture(config, entry));
907}
908
909Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
910 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
911 const auto& regs{gpu.regs};
912
913 if (!gpu.dirty_flags.zeta_buffer) {
914 return last_depth_buffer;
915 }
916 gpu.dirty_flags.zeta_buffer = false;
917
918 if (!regs.zeta.Address() || !regs.zeta_enable) {
919 return last_depth_buffer = {};
920 }
921
922 SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
923 regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format,
924 regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
925 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
926
927 return last_depth_buffer = GetSurface(depth_params, preserve_contents);
928}
929
930Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) {
931 auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
932 const auto& regs{gpu.regs};
933
934 if (!gpu.dirty_flags.color_buffer[index]) {
935 return current_color_buffers[index];
936 }
937 gpu.dirty_flags.color_buffer.reset(index);
938
939 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
940
941 if (index >= regs.rt_control.count) {
942 return current_color_buffers[index] = {};
943 }
944
945 if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
946 return current_color_buffers[index] = {};
947 }
948
949 const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
950
951 return current_color_buffers[index] = GetSurface(color_params, preserve_contents);
952}
953
954void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
955 surface->LoadGLBuffer(temporal_memory);
956 surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle);
957 surface->MarkAsModified(false, *this);
958 surface->MarkForReload(false);
959}
960
961Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
962 if (!params.IsValid()) {
963 return {};
964 }
965
966 // Look up surface in the cache based on address
967 Surface surface{TryGet(params.host_ptr)};
968 if (surface) {
969 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
970 // Use the cached surface as-is unless it's not synced with memory
971 if (surface->MustReload())
972 LoadSurface(surface);
973 return surface;
974 } else if (preserve_contents) {
975 // If surface parameters changed and we care about keeping the previous data, recreate
976 // the surface from the old one
977 Surface new_surface{RecreateSurface(surface, params)};
978 Unregister(surface);
979 Register(new_surface);
980 if (new_surface->IsUploaded()) {
981 RegisterReinterpretSurface(new_surface);
982 }
983 return new_surface;
984 } else {
985 // Delete the old surface before creating a new one to prevent collisions.
986 Unregister(surface);
987 }
988 }
989
990 // No cached surface found - get a new one
991 surface = GetUncachedSurface(params);
992 Register(surface);
993
994 // Only load surface from memory if we care about the contents
995 if (preserve_contents) {
996 LoadSurface(surface);
997 }
998
999 return surface;
1000}
1001
1002Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) {
1003 Surface surface{TryGetReservedSurface(params)};
1004 if (!surface) {
1005 // No reserved surface available, create a new one and reserve it
1006 surface = std::make_shared<CachedSurface>(params);
1007 ReserveSurface(surface);
1008 }
1009 return surface;
1010}
1011
1012void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
1013 const Surface& dst_surface) {
1014 const auto& init_params{src_surface->GetSurfaceParams()};
1015 const auto& dst_params{dst_surface->GetSurfaceParams()};
1016 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
1017 GPUVAddr address{init_params.gpu_addr};
1018 const std::size_t layer_size{dst_params.LayerMemorySize()};
1019 for (u32 layer = 0; layer < dst_params.depth; layer++) {
1020 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
1021 const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
1022 const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
1023 if (!copy) {
1024 continue;
1025 }
1026 const auto& src_params{copy->GetSurfaceParams()};
1027 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
1028 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
1029
1030 glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0,
1031 0, 0, dst_surface->Texture().handle,
1032 SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width,
1033 height, 1);
1034 }
1035 address += layer_size;
1036 }
1037
1038 dst_surface->MarkAsModified(true, *this);
1039}
1040
1041static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface,
1042 const Common::Rectangle<u32>& src_rect,
1043 const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle,
1044 GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0,
1045 std::size_t cubemap_face = 0) {
1046
1047 const auto& src_params{src_surface->GetSurfaceParams()};
1048 const auto& dst_params{dst_surface->GetSurfaceParams()};
1049
1050 OpenGLState prev_state{OpenGLState::GetCurState()};
1051 SCOPE_EXIT({ prev_state.Apply(); });
1052
1053 OpenGLState state;
1054 state.draw.read_framebuffer = read_fb_handle;
1055 state.draw.draw_framebuffer = draw_fb_handle;
1056 state.Apply();
1057
1058 u32 buffers{};
1059
1060 if (src_params.type == SurfaceType::ColorTexture) {
1061 switch (src_params.target) {
1062 case SurfaceTarget::Texture2D:
1063 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1064 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1065 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1066 0, 0);
1067 break;
1068 case SurfaceTarget::TextureCubemap:
1069 glFramebufferTexture2D(
1070 GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1071 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1072 src_surface->Texture().handle, 0);
1073 glFramebufferTexture2D(
1074 GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1075 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1076 break;
1077 case SurfaceTarget::Texture2DArray:
1078 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1079 src_surface->Texture().handle, 0, 0);
1080 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1081 break;
1082 case SurfaceTarget::Texture3D:
1083 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1084 SurfaceTargetToGL(src_params.target),
1085 src_surface->Texture().handle, 0, 0);
1086 glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1087 SurfaceTargetToGL(src_params.target), 0, 0, 0);
1088 break;
1089 default:
1090 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1091 GL_TEXTURE_2D, src_surface->Texture().handle, 0);
1092 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1093 0, 0);
1094 break;
1095 }
1096
1097 switch (dst_params.target) {
1098 case SurfaceTarget::Texture2D:
1099 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1100 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1101 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1102 0, 0);
1103 break;
1104 case SurfaceTarget::TextureCubemap:
1105 glFramebufferTexture2D(
1106 GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1107 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face),
1108 dst_surface->Texture().handle, 0);
1109 glFramebufferTexture2D(
1110 GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1111 static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0);
1112 break;
1113 case SurfaceTarget::Texture2DArray:
1114 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1115 dst_surface->Texture().handle, 0, 0);
1116 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0);
1117 break;
1118
1119 case SurfaceTarget::Texture3D:
1120 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1121 SurfaceTargetToGL(dst_params.target),
1122 dst_surface->Texture().handle, 0, 0);
1123 glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
1124 SurfaceTargetToGL(dst_params.target), 0, 0, 0);
1125 break;
1126 default:
1127 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1128 GL_TEXTURE_2D, dst_surface->Texture().handle, 0);
1129 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1130 0, 0);
1131 break;
1132 }
1133
1134 buffers = GL_COLOR_BUFFER_BIT;
1135 } else if (src_params.type == SurfaceType::Depth) {
1136 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1137 GL_TEXTURE_2D, 0, 0);
1138 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1139 src_surface->Texture().handle, 0);
1140 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1141
1142 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1143 GL_TEXTURE_2D, 0, 0);
1144 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
1145 dst_surface->Texture().handle, 0);
1146 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
1147
1148 buffers = GL_DEPTH_BUFFER_BIT;
1149 } else if (src_params.type == SurfaceType::DepthStencil) {
1150 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment,
1151 GL_TEXTURE_2D, 0, 0);
1152 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1153 src_surface->Texture().handle, 0);
1154
1155 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment,
1156 GL_TEXTURE_2D, 0, 0);
1157 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
1158 dst_surface->Texture().handle, 0);
1159
1160 buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
1161 }
1162
1163 glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
1164 dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
1165 buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
1166
1167 return true;
1168}
1169
1170void RasterizerCacheOpenGL::FermiCopySurface(
1171 const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
1172 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
1173 const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) {
1174
1175 const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
1176 const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
1177
1178 ASSERT(src_params.pixel_format == dst_params.pixel_format);
1179 ASSERT(src_params.block_height == dst_params.block_height);
1180 ASSERT(src_params.is_tiled == dst_params.is_tiled);
1181 ASSERT(src_params.depth == dst_params.depth);
1182 ASSERT(src_params.target == dst_params.target);
1183 ASSERT(src_params.rt.index == dst_params.rt.index);
1184
1185 auto src_surface = GetSurface(src_params, true);
1186 auto dst_surface = GetSurface(dst_params, true);
1187
1188 BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle,
1189 draw_framebuffer.handle);
1190
1191 dst_surface->MarkAsModified(true, *this);
1192}
1193
1194void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1195 const Surface& dst_surface) {
1196 const auto& src_params{src_surface->GetSurfaceParams()};
1197 const auto& dst_params{dst_surface->GetSurfaceParams()};
1198
1199 // Flush enough memory for both the source and destination surface
1200 FlushRegion(ToCacheAddr(src_params.host_ptr),
1201 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1202
1203 LoadSurface(dst_surface);
1204}
1205
1206Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1207 const SurfaceParams& new_params) {
1208 // Verify surface is compatible for blitting
1209 auto old_params{old_surface->GetSurfaceParams()};
1210
1211 // Get a new surface with the new parameters, and blit the previous surface to it
1212 Surface new_surface{GetUncachedSurface(new_params)};
1213
1214 // With use_accurate_gpu_emulation enabled, do an accurate surface copy
1215 if (Settings::values.use_accurate_gpu_emulation) {
1216 AccurateCopySurface(old_surface, new_surface);
1217 return new_surface;
1218 }
1219
1220 const bool old_compressed =
1221 GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed;
1222 const bool new_compressed =
1223 GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed;
1224 const bool compatible_formats =
1225 GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) &&
1226 !(old_compressed || new_compressed);
1227 // For compatible surfaces, we can just do fast glCopyImageSubData based copy
1228 if (old_params.target == new_params.target && old_params.depth == new_params.depth &&
1229 old_params.depth == 1 && compatible_formats) {
1230 FastCopySurface(old_surface, new_surface);
1231 return new_surface;
1232 }
1233
1234 switch (new_params.target) {
1235 case SurfaceTarget::Texture2D:
1236 CopySurface(old_surface, new_surface, copy_pbo.handle);
1237 break;
1238 case SurfaceTarget::Texture3D:
1239 AccurateCopySurface(old_surface, new_surface);
1240 break;
1241 case SurfaceTarget::TextureCubemap:
1242 case SurfaceTarget::Texture2DArray:
1243 case SurfaceTarget::TextureCubeArray:
1244 if (compatible_formats)
1245 FastLayeredCopySurface(old_surface, new_surface);
1246 else {
1247 AccurateCopySurface(old_surface, new_surface);
1248 }
1249 break;
1250 default:
1251 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
1252 static_cast<u32>(new_params.target));
1253 UNREACHABLE();
1254 }
1255
1256 return new_surface;
1257}
1258
1259Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1260 return TryGet(host_ptr);
1261}
1262
1263void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
1264 const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())};
1265 surface_reserve[surface_reserve_key] = surface;
1266}
1267
1268Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) {
1269 const auto& surface_reserve_key{SurfaceReserveKey::Create(params)};
1270 auto search{surface_reserve.find(surface_reserve_key)};
1271 if (search != surface_reserve.end()) {
1272 return search->second;
1273 }
1274 return {};
1275}
1276
1277static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params,
1278 u32 height) {
1279 for (u32 i = 0; i < params.max_mip_level; i++) {
1280 if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) {
1281 return {i};
1282 }
1283 }
1284 return {};
1285}
1286
1287static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) {
1288 const std::size_t size{params.LayerMemorySize()};
1289 GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
1290 for (u32 i = 0; i < params.depth; i++) {
1291 if (start == addr) {
1292 return {i};
1293 }
1294 start += size;
1295 }
1296 return {};
1297}
1298
1299static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface,
1300 const Surface blitted_surface) {
1301 const auto& dst_params = blitted_surface->GetSurfaceParams();
1302 const auto& src_params = render_surface->GetSurfaceParams();
1303 const std::size_t src_memory_size = src_params.size_in_bytes;
1304 const std::optional<u32> level =
1305 TryFindBestMipMap(src_memory_size, dst_params, src_params.height);
1306 if (level.has_value()) {
1307 if (src_params.width == dst_params.MipWidthGobAligned(*level) &&
1308 src_params.height == dst_params.MipHeight(*level) &&
1309 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1310 const std::optional<u32> slot =
1311 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
1312 if (slot.has_value()) {
1313 glCopyImageSubData(render_surface->Texture().handle,
1314 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
1315 blitted_surface->Texture().handle,
1316 SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot,
1317 dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1);
1318 blitted_surface->MarkAsModified(true, cache);
1319 return true;
1320 }
1321 }
1322 }
1323 return false;
1324}
1325
1326static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1327 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1328 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1329 if (bound2 > bound1)
1330 return true;
1331 const auto& dst_params = blitted_surface->GetSurfaceParams();
1332 const auto& src_params = render_surface->GetSurfaceParams();
1333 return (dst_params.component_type != src_params.component_type);
1334}
1335
1336static bool IsReinterpretInvalidSecond(const Surface render_surface,
1337 const Surface blitted_surface) {
1338 const auto& dst_params = blitted_surface->GetSurfaceParams();
1339 const auto& src_params = render_surface->GetSurfaceParams();
1340 return (dst_params.height > src_params.height && dst_params.width > src_params.width);
1341}
1342
1343bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface,
1344 Surface intersect) {
1345 if (IsReinterpretInvalid(triggering_surface, intersect)) {
1346 Unregister(intersect);
1347 return false;
1348 }
1349 if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) {
1350 if (IsReinterpretInvalidSecond(triggering_surface, intersect)) {
1351 Unregister(intersect);
1352 return false;
1353 }
1354 FlushObject(intersect);
1355 FlushObject(triggering_surface);
1356 intersect->MarkForReload(true);
1357 }
1358 return true;
1359}
1360
1361void RasterizerCacheOpenGL::SignalPreDrawCall() {
1362 if (texception && GLAD_GL_ARB_texture_barrier) {
1363 glTextureBarrier();
1364 }
1365 texception = false;
1366}
1367
1368void RasterizerCacheOpenGL::SignalPostDrawCall() {
1369 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1370 if (current_color_buffers[i] != nullptr) {
1371 Surface intersect =
1372 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1373 if (intersect != nullptr) {
1374 PartialReinterpretSurface(current_color_buffers[i], intersect);
1375 texception = true;
1376 }
1377 }
1378 }
1379}
1380
1381} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
deleted file mode 100644
index bbab79575..000000000
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ /dev/null
@@ -1,575 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <string>
10#include <tuple>
11#include <vector>
12
13#include "common/alignment.h"
14#include "common/bit_util.h"
15#include "common/common_types.h"
16#include "common/hash.h"
17#include "common/math_util.h"
18#include "video_core/engines/fermi_2d.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/rasterizer_cache.h"
21#include "video_core/renderer_opengl/gl_resource_manager.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h"
23#include "video_core/surface.h"
24#include "video_core/textures/decoders.h"
25#include "video_core/textures/texture.h"
26
27namespace OpenGL {
28
29class CachedSurface;
30using Surface = std::shared_ptr<CachedSurface>;
31using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;
32
33using SurfaceTarget = VideoCore::Surface::SurfaceTarget;
34using SurfaceType = VideoCore::Surface::SurfaceType;
35using PixelFormat = VideoCore::Surface::PixelFormat;
36using ComponentType = VideoCore::Surface::ComponentType;
37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
38
39struct SurfaceParams {
40 enum class SurfaceClass {
41 Uploaded,
42 RenderTarget,
43 DepthBuffer,
44 Copy,
45 };
46
47 static std::string SurfaceTargetName(SurfaceTarget target) {
48 switch (target) {
49 case SurfaceTarget::Texture1D:
50 return "Texture1D";
51 case SurfaceTarget::Texture2D:
52 return "Texture2D";
53 case SurfaceTarget::Texture3D:
54 return "Texture3D";
55 case SurfaceTarget::Texture1DArray:
56 return "Texture1DArray";
57 case SurfaceTarget::Texture2DArray:
58 return "Texture2DArray";
59 case SurfaceTarget::TextureCubemap:
60 return "TextureCubemap";
61 case SurfaceTarget::TextureCubeArray:
62 return "TextureCubeArray";
63 default:
64 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
65 UNREACHABLE();
66 return fmt::format("TextureUnknown({})", static_cast<u32>(target));
67 }
68 }
69
70 u32 GetFormatBpp() const {
71 return VideoCore::Surface::GetFormatBpp(pixel_format);
72 }
73
74 /// Returns the rectangle corresponding to this surface
75 Common::Rectangle<u32> GetRect(u32 mip_level = 0) const;
76
77 /// Returns the total size of this surface in bytes, adjusted for compression
78 std::size_t SizeInBytesRaw(bool ignore_tiled = false) const {
79 const u32 compression_factor{GetCompressionFactor(pixel_format)};
80 const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)};
81 const size_t uncompressed_size{
82 Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width,
83 height, depth, block_height, block_depth)};
84
85 // Divide by compression_factor^2, as height and width are factored by this
86 return uncompressed_size / (compression_factor * compression_factor);
87 }
88
89 /// Returns the size of this surface as an OpenGL texture in bytes
90 std::size_t SizeInBytesGL() const {
91 return SizeInBytesRaw(true);
92 }
93
94 /// Returns the size of this surface as a cube face in bytes
95 std::size_t SizeInBytesCubeFace() const {
96 return size_in_bytes / 6;
97 }
98
99 /// Returns the size of this surface as an OpenGL cube face in bytes
100 std::size_t SizeInBytesCubeFaceGL() const {
101 return size_in_bytes_gl / 6;
102 }
103
104 /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps.
105 std::size_t MemorySize() const {
106 std::size_t size = InnerMemorySize(false, is_layered);
107 if (is_layered)
108 return size * depth;
109 return size;
110 }
111
112 /// Returns true if the parameters constitute a valid rasterizer surface.
113 bool IsValid() const {
114 return gpu_addr && host_ptr && height && width;
115 }
116
117 /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including
118 /// mipmaps.
119 std::size_t LayerMemorySize() const {
120 return InnerMemorySize(false, true);
121 }
122
123 /// Returns the size of a layer of this surface in OpenGL.
124 std::size_t LayerSizeGL(u32 mip_level) const {
125 return InnerMipmapMemorySize(mip_level, true, is_layered, false);
126 }
127
128 std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const {
129 std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed);
130 if (is_layered)
131 return size * depth;
132 return size;
133 }
134
135 std::size_t GetMipmapLevelOffset(u32 mip_level) const {
136 std::size_t offset = 0;
137 for (u32 i = 0; i < mip_level; i++)
138 offset += InnerMipmapMemorySize(i, false, is_layered);
139 return offset;
140 }
141
142 std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const {
143 std::size_t offset = 0;
144 for (u32 i = 0; i < mip_level; i++)
145 offset += InnerMipmapMemorySize(i, true, is_layered);
146 return offset;
147 }
148
149 std::size_t GetMipmapSingleSize(u32 mip_level) const {
150 return InnerMipmapMemorySize(mip_level, false, is_layered);
151 }
152
153 u32 MipWidth(u32 mip_level) const {
154 return std::max(1U, width >> mip_level);
155 }
156
157 u32 MipWidthGobAligned(u32 mip_level) const {
158 return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp());
159 }
160
161 u32 MipHeight(u32 mip_level) const {
162 return std::max(1U, height >> mip_level);
163 }
164
165 u32 MipDepth(u32 mip_level) const {
166 return is_layered ? depth : std::max(1U, depth >> mip_level);
167 }
168
169 // Auto block resizing algorithm from:
170 // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
171 u32 MipBlockHeight(u32 mip_level) const {
172 if (mip_level == 0)
173 return block_height;
174 u32 alt_height = MipHeight(mip_level);
175 u32 h = GetDefaultBlockHeight(pixel_format);
176 u32 blocks_in_y = (alt_height + h - 1) / h;
177 u32 bh = 16;
178 while (bh > 1 && blocks_in_y <= bh * 4) {
179 bh >>= 1;
180 }
181 return bh;
182 }
183
184 u32 MipBlockDepth(u32 mip_level) const {
185 if (mip_level == 0) {
186 return block_depth;
187 }
188
189 if (is_layered) {
190 return 1;
191 }
192
193 const u32 mip_depth = MipDepth(mip_level);
194 u32 bd = 32;
195 while (bd > 1 && mip_depth * 2 <= bd) {
196 bd >>= 1;
197 }
198
199 if (bd == 32) {
200 const u32 bh = MipBlockHeight(mip_level);
201 if (bh >= 4) {
202 return 16;
203 }
204 }
205
206 return bd;
207 }
208
209 u32 RowAlign(u32 mip_level) const {
210 const u32 m_width = MipWidth(mip_level);
211 const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format);
212 const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel);
213 return (1U << l2);
214 }
215
216 /// Creates SurfaceParams from a texture configuration
217 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config,
218 const GLShader::SamplerEntry& entry);
219
220 /// Creates SurfaceParams from a framebuffer configuration
221 static SurfaceParams CreateForFramebuffer(std::size_t index);
222
223 /// Creates SurfaceParams for a depth buffer configuration
224 static SurfaceParams CreateForDepthBuffer(
225 u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format,
226 u32 block_width, u32 block_height, u32 block_depth,
227 Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
228
229 /// Creates SurfaceParams for a Fermi2D surface copy
230 static SurfaceParams CreateForFermiCopySurface(
231 const Tegra::Engines::Fermi2D::Regs::Surface& config);
232
233 /// Checks if surfaces are compatible for caching
234 bool IsCompatibleSurface(const SurfaceParams& other) const {
235 if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) ==
236 std::tie(other.pixel_format, other.type, other.width, other.height, other.target,
237 other.depth, other.is_tiled)) {
238 if (!is_tiled)
239 return true;
240 return std::tie(block_height, block_depth, tile_width_spacing) ==
241 std::tie(other.block_height, other.block_depth, other.tile_width_spacing);
242 }
243 return false;
244 }
245
246 /// Initializes parameters for caching, should be called after everything has been initialized
247 void InitCacheParameters(GPUVAddr gpu_addr);
248
249 std::string TargetName() const {
250 switch (target) {
251 case SurfaceTarget::Texture1D:
252 return "1D";
253 case SurfaceTarget::TextureBuffer:
254 return "Buffer";
255 case SurfaceTarget::Texture2D:
256 return "2D";
257 case SurfaceTarget::Texture3D:
258 return "3D";
259 case SurfaceTarget::Texture1DArray:
260 return "1DArray";
261 case SurfaceTarget::Texture2DArray:
262 return "2DArray";
263 case SurfaceTarget::TextureCubemap:
264 return "Cube";
265 default:
266 LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target));
267 UNREACHABLE();
268 return fmt::format("TUK({})", static_cast<u32>(target));
269 }
270 }
271
272 std::string ClassName() const {
273 switch (identity) {
274 case SurfaceClass::Uploaded:
275 return "UP";
276 case SurfaceClass::RenderTarget:
277 return "RT";
278 case SurfaceClass::DepthBuffer:
279 return "DB";
280 case SurfaceClass::Copy:
281 return "CP";
282 default:
283 LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity));
284 UNREACHABLE();
285 return fmt::format("CUK({})", static_cast<u32>(identity));
286 }
287 }
288
289 std::string IdentityString() const {
290 return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L');
291 }
292
293 bool is_tiled;
294 u32 block_width;
295 u32 block_height;
296 u32 block_depth;
297 u32 tile_width_spacing;
298 PixelFormat pixel_format;
299 ComponentType component_type;
300 SurfaceType type;
301 u32 width;
302 u32 height;
303 u32 depth;
304 u32 unaligned_height;
305 u32 pitch;
306 SurfaceTarget target;
307 SurfaceClass identity;
308 u32 max_mip_level;
309 bool is_layered;
310 bool is_array;
311 bool srgb_conversion;
312 // Parameters used for caching
313 u8* host_ptr;
314 GPUVAddr gpu_addr;
315 std::size_t size_in_bytes;
316 std::size_t size_in_bytes_gl;
317
318 // Render target specific parameters, not used in caching
319 struct {
320 u32 index;
321 u32 array_mode;
322 u32 volume;
323 u32 layer_stride;
324 u32 base_layer;
325 } rt;
326
327private:
328 std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false,
329 bool uncompressed = false) const;
330 std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false,
331 bool uncompressed = false) const;
332};
333
334}; // namespace OpenGL
335
336/// Hashable variation of SurfaceParams, used for a key in the surface cache
337struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> {
338 static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) {
339 SurfaceReserveKey res;
340 res.state = params;
341 res.state.identity = {}; // Ignore the origin of the texture
342 res.state.gpu_addr = {}; // Ignore GPU vaddr in caching
343 res.state.rt = {}; // Ignore rt config in caching
344 return res;
345 }
346};
347namespace std {
348template <>
349struct hash<SurfaceReserveKey> {
350 std::size_t operator()(const SurfaceReserveKey& k) const {
351 return k.Hash();
352 }
353};
354} // namespace std
355
356namespace OpenGL {
357
358class RasterizerOpenGL;
359
360// This is used to store temporary big buffers,
361// instead of creating/destroying all the time
362struct RasterizerTemporaryMemory {
363 std::vector<std::vector<u8>> gl_buffer;
364};
365
366class CachedSurface final : public RasterizerCacheObject {
367public:
368 explicit CachedSurface(const SurfaceParams& params);
369
370 VAddr GetCpuAddr() const override {
371 return cpu_addr;
372 }
373
374 std::size_t GetSizeInBytes() const override {
375 return cached_size_in_bytes;
376 }
377
378 std::size_t GetMemorySize() const {
379 return memory_size;
380 }
381
382 const OGLTexture& Texture() const {
383 return texture;
384 }
385
386 const OGLTexture& Texture(bool as_array) {
387 if (params.is_array == as_array) {
388 return texture;
389 } else {
390 EnsureTextureDiscrepantView();
391 return discrepant_view;
392 }
393 }
394
395 GLenum Target() const {
396 return gl_target;
397 }
398
399 const SurfaceParams& GetSurfaceParams() const {
400 return params;
401 }
402
403 // Read/Write data in Switch memory to/from gl_buffer
404 void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
405 void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem);
406
407 // Upload data in gl_buffer to this surface's texture
408 void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle,
409 GLuint draw_fb_handle);
410
411 void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
412 Tegra::Texture::SwizzleSource swizzle_y,
413 Tegra::Texture::SwizzleSource swizzle_z,
414 Tegra::Texture::SwizzleSource swizzle_w);
415
416 void MarkReinterpreted() {
417 reinterpreted = true;
418 }
419
420 bool IsReinterpreted() const {
421 return reinterpreted;
422 }
423
424 void MarkForReload(bool reload) {
425 must_reload = reload;
426 }
427
428 bool MustReload() const {
429 return must_reload;
430 }
431
432 bool IsUploaded() const {
433 return params.identity == SurfaceParams::SurfaceClass::Uploaded;
434 }
435
436private:
437 void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map,
438 GLuint read_fb_handle, GLuint draw_fb_handle);
439
440 void EnsureTextureDiscrepantView();
441
442 OGLTexture texture;
443 OGLTexture discrepant_view;
444 OGLBuffer texture_buffer;
445 SurfaceParams params{};
446 GLenum gl_target{};
447 GLenum gl_internal_format{};
448 std::size_t cached_size_in_bytes{};
449 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
450 std::size_t memory_size;
451 bool reinterpreted = false;
452 bool must_reload = false;
453 VAddr cpu_addr{};
454};
455
456class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
457public:
458 explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer);
459
460 /// Get a surface based on the texture configuration
461 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
462 const GLShader::SamplerEntry& entry);
463
464 /// Get the depth surface based on the framebuffer configuration
465 Surface GetDepthBufferSurface(bool preserve_contents);
466
467 /// Get the color surface based on the framebuffer configuration and the specified render target
468 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
469
470 /// Tries to find a framebuffer using on the provided CPU address
471 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
472
473 /// Copies the contents of one surface to another
474 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
475 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
476 const Common::Rectangle<u32>& src_rect,
477 const Common::Rectangle<u32>& dst_rect);
478
479 void SignalPreDrawCall();
480 void SignalPostDrawCall();
481
482protected:
483 void FlushObjectInner(const Surface& object) override {
484 object->FlushGLBuffer(temporal_memory);
485 }
486
487private:
488 void LoadSurface(const Surface& surface);
489 Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true);
490
491 /// Gets an uncached surface, creating it if need be
492 Surface GetUncachedSurface(const SurfaceParams& params);
493
494 /// Recreates a surface with new parameters
495 Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params);
496
497 /// Reserves a unique surface that can be reused later
498 void ReserveSurface(const Surface& surface);
499
500 /// Tries to get a reserved surface for the specified parameters
501 Surface TryGetReservedSurface(const SurfaceParams& params);
502
503 // Partialy reinterpret a surface based on a triggering_surface that collides with it.
504 // returns true if the reinterpret was successful, false in case it was not.
505 bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect);
506
507 /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data
508 void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface);
509 void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface);
510 void FastCopySurface(const Surface& src_surface, const Surface& dst_surface);
511 void CopySurface(const Surface& src_surface, const Surface& dst_surface,
512 const GLuint copy_pbo_handle, const GLenum src_attachment = 0,
513 const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0);
514
515 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
516 /// previously been used. This is to prevent surfaces from being constantly created and
517 /// destroyed when used with different surface parameters.
518 std::unordered_map<SurfaceReserveKey, Surface> surface_reserve;
519
520 OGLFramebuffer read_framebuffer;
521 OGLFramebuffer draw_framebuffer;
522
523 bool texception = false;
524
525 /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
526 /// using the new format.
527 OGLBuffer copy_pbo;
528
529 std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers;
530 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
531 Surface last_depth_buffer;
532
533 RasterizerTemporaryMemory temporal_memory;
534
535 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
536 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
537
538 static auto GetReinterpretInterval(const Surface& object) {
539 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
540 object->GetCacheAddr() + object->GetMemorySize() - 1);
541 }
542
543 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
544 SurfaceIntervalCache reinterpreted_surfaces;
545
546 void RegisterReinterpretSurface(Surface reinterpret_surface) {
547 auto interval = GetReinterpretInterval(reinterpret_surface);
548 reinterpreted_surfaces.insert({interval, reinterpret_surface});
549 reinterpret_surface->MarkReinterpreted();
550 }
551
552 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
553 const SurfaceInterval interval{addr};
554 for (auto& pair :
555 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
556 return pair.second;
557 }
558 return nullptr;
559 }
560
561 void Register(const Surface& object) override {
562 RasterizerCache<Surface>::Register(object);
563 }
564
565 /// Unregisters an object from the cache
566 void Unregister(const Surface& object) override {
567 if (object->IsReinterpreted()) {
568 auto interval = GetReinterpretInterval(object);
569 reinterpreted_surfaces.erase(interval);
570 }
571 RasterizerCache<Surface>::Unregister(object);
572 }
573};
574
575} // namespace OpenGL