summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2018-06-26 16:14:14 -0400
committerGravatar bunnei2018-06-27 00:15:44 -0400
commit1dd754590fb9850bf00ddacbb860076dbbacabc6 (patch)
treea628bb47bb9f3308c281b608ee6c347883553bf6
parentgl_rasterizer_cache: Various fixes for ASTC handling. (diff)
downloadyuzu-1dd754590fb9850bf00ddacbb860076dbbacabc6.tar.gz
yuzu-1dd754590fb9850bf00ddacbb860076dbbacabc6.tar.xz
yuzu-1dd754590fb9850bf00ddacbb860076dbbacabc6.zip
gl_rasterizer_cache: Implement caching for texture and framebuffer surfaces.
gl_rasterizer_cache: Improved cache management based on Citra's implementation. gl_surface_cache: Add some docstrings.
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp25
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp116
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h43
3 files changed, 168 insertions, 16 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f9b0ce434..62ee45a36 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() {
435 435
436 // Mark framebuffer surfaces as dirty 436 // Mark framebuffer surfaces as dirty
437 if (color_surface != nullptr && write_color_fb) { 437 if (color_surface != nullptr && write_color_fb) {
438 res_cache.FlushSurface(color_surface); 438 res_cache.MarkSurfaceAsDirty(color_surface);
439 } 439 }
440 if (depth_surface != nullptr && write_depth_fb) { 440 if (depth_surface != nullptr && write_depth_fb) {
441 res_cache.FlushSurface(depth_surface); 441 res_cache.MarkSurfaceAsDirty(depth_surface);
442 } 442 }
443} 443}
444 444
445void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} 445void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
446 446
447void RasterizerOpenGL::FlushAll() {} 447void RasterizerOpenGL::FlushAll() {
448 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
449 res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
450}
448 451
449void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} 452void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
453 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
454 res_cache.FlushRegion(addr, size);
455}
450 456
451void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} 457void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
458 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
459 res_cache.InvalidateRegion(addr, size);
460}
452 461
453void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} 462void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
463 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
464 res_cache.FlushRegion(addr, size);
465 res_cache.InvalidateRegion(addr, size);
466}
454 467
455bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { 468bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) {
456 MICROPROFILE_SCOPE(OpenGL_Blits); 469 MICROPROFILE_SCOPE(OpenGL_Blits);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index bd35bdb02..71ad7be74 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -12,6 +12,7 @@
12#include "core/core.h" 12#include "core/core.h"
13#include "core/hle/kernel/process.h" 13#include "core/hle/kernel/process.h"
14#include "core/memory.h" 14#include "core/memory.h"
15#include "core/settings.h"
15#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
16#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 17#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
17#include "video_core/textures/astc.h" 18#include "video_core/textures/astc.h"
@@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
215 cur_state.Apply(); 216 cur_state.Apply();
216} 217}
217 218
218CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { 219CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
219 texture.Create(); 220 texture.Create();
220 const auto& rect{params.GetRect()}; 221 const auto& rect{params.GetRect()};
221 AllocateSurfaceTexture(texture.handle, 222 AllocateSurfaceTexture(texture.handle,
@@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
370 draw_framebuffer.Create(); 371 draw_framebuffer.Create();
371} 372}
372 373
374RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
375 while (!surface_cache.empty()) {
376 UnregisterSurface(surface_cache.begin()->second);
377 }
378}
379
373Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { 380Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
374 return GetSurface(SurfaceParams::CreateForTexture(config)); 381 return GetSurface(SurfaceParams::CreateForTexture(config));
375} 382}
@@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
425 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 432 surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
426} 433}
427 434
428void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { 435void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) {
429 surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); 436 if (Settings::values.use_accurate_framebuffers) {
430 surface->FlushGLBuffer(); 437 // If enabled, always flush dirty surfaces
438 surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
439 surface->FlushGLBuffer();
440 } else {
441 // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads
442 // and flushes are very slow and do not seem to improve accuracy
443 const auto& params{surface->GetSurfaceParams()};
444 Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false);
445 }
431} 446}
432 447
433Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { 448Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
@@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) {
441 Surface surface; 456 Surface surface;
442 if (search != surface_cache.end()) { 457 if (search != surface_cache.end()) {
443 surface = search->second; 458 surface = search->second;
459 if (Settings::values.use_accurate_framebuffers) {
460 // Reload the surface from Switch memory
461 LoadSurface(surface);
462 }
444 } else { 463 } else {
445 surface = std::make_shared<CachedSurface>(params); 464 surface = std::make_shared<CachedSurface>(params);
446 surface_cache[surface_key] = surface; 465 RegisterSurface(surface);
466 LoadSurface(surface);
447 } 467 }
448 468
449 LoadSurface(surface);
450
451 return surface; 469 return surface;
452} 470}
453 471
@@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const {
476 494
477 return surfaces[0]; 495 return surfaces[0];
478} 496}
497
498void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) {
499 // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should
500 // probably implement this in the future, but for now, the `use_accurate_framebufers` setting
501 // can be used to always flush.
502}
503
504void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) {
505 for (const auto& pair : surface_cache) {
506 const auto& surface{pair.second};
507 const auto& params{surface->GetSurfaceParams()};
508
509 if (params.IsOverlappingRegion(addr, size)) {
510 UnregisterSurface(surface);
511 }
512 }
513}
514
515void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) {
516 const auto& params{surface->GetSurfaceParams()};
517 const auto& surface_key{SurfaceKey::Create(params)};
518 const auto& search{surface_cache.find(surface_key)};
519
520 if (search != surface_cache.end()) {
521 // Registered already
522 return;
523 }
524
525 surface_cache[surface_key] = surface;
526 UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1);
527}
528
529void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
530 const auto& params{surface->GetSurfaceParams()};
531 const auto& surface_key{SurfaceKey::Create(params)};
532 const auto& search{surface_cache.find(surface_key)};
533
534 if (search == surface_cache.end()) {
535 // Unregistered already
536 return;
537 }
538
539 UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1);
540 surface_cache.erase(search);
541}
542
543template <typename Map, typename Interval>
544constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
545 return boost::make_iterator_range(map.equal_range(interval));
546}
547
548void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
549 const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
550 (addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
551 const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
552 const u64 page_end = page_start + num_pages;
553
554 // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
555 // subtract after iterating
556 const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end);
557 if (delta > 0)
558 cached_pages.add({pages_interval, delta});
559
560 for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
561 const auto interval = pair.first & pages_interval;
562 const int count = pair.second;
563
564 const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
565 << Tegra::MemoryManager::PAGE_BITS;
566 const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
567 << Tegra::MemoryManager::PAGE_BITS;
568 const u64 interval_size = interval_end_addr - interval_start_addr;
569
570 if (delta > 0 && count == delta)
571 Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
572 else if (delta < 0 && count == -delta)
573 Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
574 else
575 ASSERT(count >= 0);
576 }
577
578 if (delta < 0)
579 cached_pages.add({pages_interval, delta});
580}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 84bdec652..85e7c8888 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,7 +8,7 @@
8#include <map> 8#include <map>
9#include <memory> 9#include <memory>
10#include <vector> 10#include <vector>
11 11#include <boost/icl/interval_map.hpp>
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/hash.h" 13#include "common/hash.h"
14#include "common/math_util.h" 14#include "common/math_util.h"
@@ -19,6 +19,7 @@
19class CachedSurface; 19class CachedSurface;
20using Surface = std::shared_ptr<CachedSurface>; 20using Surface = std::shared_ptr<CachedSurface>;
21using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; 21using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
22using PageMap = boost::icl::interval_map<u64, int>;
22 23
23struct SurfaceParams { 24struct SurfaceParams {
24 enum class PixelFormat { 25 enum class PixelFormat {
@@ -243,8 +244,10 @@ struct SurfaceParams {
243 return SurfaceType::Invalid; 244 return SurfaceType::Invalid;
244 } 245 }
245 246
247 /// Returns the rectangle corresponding to this surface
246 MathUtil::Rectangle<u32> GetRect() const; 248 MathUtil::Rectangle<u32> GetRect() const;
247 249
250 /// Returns the size of this surface in bytes, adjusted for compression
248 size_t SizeInBytes() const { 251 size_t SizeInBytes() const {
249 const u32 compression_factor{GetCompressionFactor(pixel_format)}; 252 const u32 compression_factor{GetCompressionFactor(pixel_format)};
250 ASSERT(width % compression_factor == 0); 253 ASSERT(width % compression_factor == 0);
@@ -253,10 +256,18 @@ struct SurfaceParams {
253 GetFormatBpp(pixel_format) / CHAR_BIT; 256 GetFormatBpp(pixel_format) / CHAR_BIT;
254 } 257 }
255 258
259 /// Returns the CPU virtual address for this surface
256 VAddr GetCpuAddr() const; 260 VAddr GetCpuAddr() const;
257 261
262 /// Returns true if the specified region overlaps with this surface's region in Switch memory
263 bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const {
264 return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes);
265 }
266
267 /// Creates SurfaceParams from a texture configation
258 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); 268 static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
259 269
270 /// Creates SurfaceParams from a framebuffer configation
260 static SurfaceParams CreateForFramebuffer( 271 static SurfaceParams CreateForFramebuffer(
261 const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); 272 const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
262 273
@@ -272,6 +283,7 @@ struct SurfaceParams {
272 size_t size_in_bytes; 283 size_t size_in_bytes;
273}; 284};
274 285
286/// Hashable variation of SurfaceParams, used for a key in the surface cache
275struct SurfaceKey : Common::HashableStruct<SurfaceParams> { 287struct SurfaceKey : Common::HashableStruct<SurfaceParams> {
276 static SurfaceKey Create(const SurfaceParams& params) { 288 static SurfaceKey Create(const SurfaceParams& params) {
277 SurfaceKey res; 289 SurfaceKey res;
@@ -325,18 +337,43 @@ private:
325class RasterizerCacheOpenGL final : NonCopyable { 337class RasterizerCacheOpenGL final : NonCopyable {
326public: 338public:
327 RasterizerCacheOpenGL(); 339 RasterizerCacheOpenGL();
340 ~RasterizerCacheOpenGL();
328 341
342 /// Get a surface based on the texture configuration
329 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); 343 Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
344
345 /// Get the color and depth surfaces based on the framebuffer configuration
330 SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, 346 SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
331 const MathUtil::Rectangle<s32>& viewport); 347 const MathUtil::Rectangle<s32>& viewport);
332 void LoadSurface(const Surface& surface); 348
333 void FlushSurface(const Surface& surface); 349 /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory
350 void MarkSurfaceAsDirty(const Surface& surface);
351
352 /// Tries to find a framebuffer GPU address based on the provided CPU address
334 Surface TryFindFramebufferSurface(VAddr cpu_addr) const; 353 Surface TryFindFramebufferSurface(VAddr cpu_addr) const;
335 354
355 /// Write any cached resources overlapping the region back to memory (if dirty)
356 void FlushRegion(Tegra::GPUVAddr addr, size_t size);
357
358 /// Mark the specified region as being invalidated
359 void InvalidateRegion(Tegra::GPUVAddr addr, size_t size);
360
336private: 361private:
362 void LoadSurface(const Surface& surface);
337 Surface GetSurface(const SurfaceParams& params); 363 Surface GetSurface(const SurfaceParams& params);
338 364
365 /// Register surface into the cache
366 void RegisterSurface(const Surface& surface);
367
368 /// Remove surface from the cache
369 void UnregisterSurface(const Surface& surface);
370
371 /// Increase/decrease the number of surface in pages touching the specified region
372 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
373
339 std::unordered_map<SurfaceKey, Surface> surface_cache; 374 std::unordered_map<SurfaceKey, Surface> surface_cache;
375 PageMap cached_pages;
376
340 OGLFramebuffer read_framebuffer; 377 OGLFramebuffer read_framebuffer;
341 OGLFramebuffer draw_framebuffer; 378 OGLFramebuffer draw_framebuffer;
342}; 379};