diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 116 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 43 |
3 files changed, 168 insertions, 16 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b0ce434..62ee45a36 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -435,22 +435,35 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 435 | 435 | ||
| 436 | // Mark framebuffer surfaces as dirty | 436 | // Mark framebuffer surfaces as dirty |
| 437 | if (color_surface != nullptr && write_color_fb) { | 437 | if (color_surface != nullptr && write_color_fb) { |
| 438 | res_cache.FlushSurface(color_surface); | 438 | res_cache.MarkSurfaceAsDirty(color_surface); |
| 439 | } | 439 | } |
| 440 | if (depth_surface != nullptr && write_depth_fb) { | 440 | if (depth_surface != nullptr && write_depth_fb) { |
| 441 | res_cache.FlushSurface(depth_surface); | 441 | res_cache.MarkSurfaceAsDirty(depth_surface); |
| 442 | } | 442 | } |
| 443 | } | 443 | } |
| 444 | 444 | ||
| 445 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} | 445 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} |
| 446 | 446 | ||
| 447 | void RasterizerOpenGL::FlushAll() {} | 447 | void RasterizerOpenGL::FlushAll() { |
| 448 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 449 | res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); | ||
| 450 | } | ||
| 448 | 451 | ||
| 449 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {} | 452 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { |
| 453 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 454 | res_cache.FlushRegion(addr, size); | ||
| 455 | } | ||
| 450 | 456 | ||
| 451 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} | 457 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { |
| 458 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 459 | res_cache.InvalidateRegion(addr, size); | ||
| 460 | } | ||
| 452 | 461 | ||
| 453 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {} | 462 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { |
| 463 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 464 | res_cache.FlushRegion(addr, size); | ||
| 465 | res_cache.InvalidateRegion(addr, size); | ||
| 466 | } | ||
| 454 | 467 | ||
| 455 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { | 468 | bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { |
| 456 | MICROPROFILE_SCOPE(OpenGL_Blits); | 469 | MICROPROFILE_SCOPE(OpenGL_Blits); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd35bdb02..71ad7be74 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/hle/kernel/process.h" | 13 | #include "core/hle/kernel/process.h" |
| 14 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 15 | #include "core/settings.h" | ||
| 15 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 16 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 17 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 17 | #include "video_core/textures/astc.h" | 18 | #include "video_core/textures/astc.h" |
| @@ -215,7 +216,7 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup | |||
| 215 | cur_state.Apply(); | 216 | cur_state.Apply(); |
| 216 | } | 217 | } |
| 217 | 218 | ||
| 218 | CachedSurface::CachedSurface(const SurfaceParams& params) : params(params), gl_buffer_size(0) { | 219 | CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { |
| 219 | texture.Create(); | 220 | texture.Create(); |
| 220 | const auto& rect{params.GetRect()}; | 221 | const auto& rect{params.GetRect()}; |
| 221 | AllocateSurfaceTexture(texture.handle, | 222 | AllocateSurfaceTexture(texture.handle, |
| @@ -370,6 +371,12 @@ RasterizerCacheOpenGL::RasterizerCacheOpenGL() { | |||
| 370 | draw_framebuffer.Create(); | 371 | draw_framebuffer.Create(); |
| 371 | } | 372 | } |
| 372 | 373 | ||
| 374 | RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { | ||
| 375 | while (!surface_cache.empty()) { | ||
| 376 | UnregisterSurface(surface_cache.begin()->second); | ||
| 377 | } | ||
| 378 | } | ||
| 379 | |||
| 373 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { | 380 | Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { |
| 374 | return GetSurface(SurfaceParams::CreateForTexture(config)); | 381 | return GetSurface(SurfaceParams::CreateForTexture(config)); |
| 375 | } | 382 | } |
| @@ -425,9 +432,17 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { | |||
| 425 | surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | 432 | surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); |
| 426 | } | 433 | } |
| 427 | 434 | ||
| 428 | void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { | 435 | void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { |
| 429 | surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | 436 | if (Settings::values.use_accurate_framebuffers) { |
| 430 | surface->FlushGLBuffer(); | 437 | // If enabled, always flush dirty surfaces |
| 438 | surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); | ||
| 439 | surface->FlushGLBuffer(); | ||
| 440 | } else { | ||
| 441 | // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads | ||
| 442 | // and flushes are very slow and do not seem to improve accuracy | ||
| 443 | const auto& params{surface->GetSurfaceParams()}; | ||
| 444 | Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); | ||
| 445 | } | ||
| 431 | } | 446 | } |
| 432 | 447 | ||
| 433 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { | 448 | Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { |
| @@ -441,13 +456,16 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { | |||
| 441 | Surface surface; | 456 | Surface surface; |
| 442 | if (search != surface_cache.end()) { | 457 | if (search != surface_cache.end()) { |
| 443 | surface = search->second; | 458 | surface = search->second; |
| 459 | if (Settings::values.use_accurate_framebuffers) { | ||
| 460 | // Reload the surface from Switch memory | ||
| 461 | LoadSurface(surface); | ||
| 462 | } | ||
| 444 | } else { | 463 | } else { |
| 445 | surface = std::make_shared<CachedSurface>(params); | 464 | surface = std::make_shared<CachedSurface>(params); |
| 446 | surface_cache[surface_key] = surface; | 465 | RegisterSurface(surface); |
| 466 | LoadSurface(surface); | ||
| 447 | } | 467 | } |
| 448 | 468 | ||
| 449 | LoadSurface(surface); | ||
| 450 | |||
| 451 | return surface; | 469 | return surface; |
| 452 | } | 470 | } |
| 453 | 471 | ||
| @@ -476,3 +494,87 @@ Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { | |||
| 476 | 494 | ||
| 477 | return surfaces[0]; | 495 | return surfaces[0]; |
| 478 | } | 496 | } |
| 497 | |||
| 498 | void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { | ||
| 499 | // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should | ||
| 500 | // probably implement this in the future, but for now, the `use_accurate_framebufers` setting | ||
| 501 | // can be used to always flush. | ||
| 502 | } | ||
| 503 | |||
| 504 | void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { | ||
| 505 | for (const auto& pair : surface_cache) { | ||
| 506 | const auto& surface{pair.second}; | ||
| 507 | const auto& params{surface->GetSurfaceParams()}; | ||
| 508 | |||
| 509 | if (params.IsOverlappingRegion(addr, size)) { | ||
| 510 | UnregisterSurface(surface); | ||
| 511 | } | ||
| 512 | } | ||
| 513 | } | ||
| 514 | |||
| 515 | void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { | ||
| 516 | const auto& params{surface->GetSurfaceParams()}; | ||
| 517 | const auto& surface_key{SurfaceKey::Create(params)}; | ||
| 518 | const auto& search{surface_cache.find(surface_key)}; | ||
| 519 | |||
| 520 | if (search != surface_cache.end()) { | ||
| 521 | // Registered already | ||
| 522 | return; | ||
| 523 | } | ||
| 524 | |||
| 525 | surface_cache[surface_key] = surface; | ||
| 526 | UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); | ||
| 527 | } | ||
| 528 | |||
| 529 | void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { | ||
| 530 | const auto& params{surface->GetSurfaceParams()}; | ||
| 531 | const auto& surface_key{SurfaceKey::Create(params)}; | ||
| 532 | const auto& search{surface_cache.find(surface_key)}; | ||
| 533 | |||
| 534 | if (search == surface_cache.end()) { | ||
| 535 | // Unregistered already | ||
| 536 | return; | ||
| 537 | } | ||
| 538 | |||
| 539 | UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); | ||
| 540 | surface_cache.erase(search); | ||
| 541 | } | ||
| 542 | |||
| 543 | template <typename Map, typename Interval> | ||
| 544 | constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||
| 545 | return boost::make_iterator_range(map.equal_range(interval)); | ||
| 546 | } | ||
| 547 | |||
| 548 | void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | ||
| 549 | const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) - | ||
| 550 | (addr >> Tegra::MemoryManager::PAGE_BITS) + 1; | ||
| 551 | const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS; | ||
| 552 | const u64 page_end = page_start + num_pages; | ||
| 553 | |||
| 554 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||
| 555 | // subtract after iterating | ||
| 556 | const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); | ||
| 557 | if (delta > 0) | ||
| 558 | cached_pages.add({pages_interval, delta}); | ||
| 559 | |||
| 560 | for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||
| 561 | const auto interval = pair.first & pages_interval; | ||
| 562 | const int count = pair.second; | ||
| 563 | |||
| 564 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | ||
| 565 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 566 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 567 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 568 | const u64 interval_size = interval_end_addr - interval_start_addr; | ||
| 569 | |||
| 570 | if (delta > 0 && count == delta) | ||
| 571 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||
| 572 | else if (delta < 0 && count == -delta) | ||
| 573 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||
| 574 | else | ||
| 575 | ASSERT(count >= 0); | ||
| 576 | } | ||
| 577 | |||
| 578 | if (delta < 0) | ||
| 579 | cached_pages.add({pages_interval, delta}); | ||
| 580 | } | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 84bdec652..85e7c8888 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <map> | 8 | #include <map> |
| 9 | #include <memory> | 9 | #include <memory> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | #include <boost/icl/interval_map.hpp> | |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/hash.h" | 13 | #include "common/hash.h" |
| 14 | #include "common/math_util.h" | 14 | #include "common/math_util.h" |
| @@ -19,6 +19,7 @@ | |||
| 19 | class CachedSurface; | 19 | class CachedSurface; |
| 20 | using Surface = std::shared_ptr<CachedSurface>; | 20 | using Surface = std::shared_ptr<CachedSurface>; |
| 21 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; | 21 | using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; |
| 22 | using PageMap = boost::icl::interval_map<u64, int>; | ||
| 22 | 23 | ||
| 23 | struct SurfaceParams { | 24 | struct SurfaceParams { |
| 24 | enum class PixelFormat { | 25 | enum class PixelFormat { |
| @@ -243,8 +244,10 @@ struct SurfaceParams { | |||
| 243 | return SurfaceType::Invalid; | 244 | return SurfaceType::Invalid; |
| 244 | } | 245 | } |
| 245 | 246 | ||
| 247 | /// Returns the rectangle corresponding to this surface | ||
| 246 | MathUtil::Rectangle<u32> GetRect() const; | 248 | MathUtil::Rectangle<u32> GetRect() const; |
| 247 | 249 | ||
| 250 | /// Returns the size of this surface in bytes, adjusted for compression | ||
| 248 | size_t SizeInBytes() const { | 251 | size_t SizeInBytes() const { |
| 249 | const u32 compression_factor{GetCompressionFactor(pixel_format)}; | 252 | const u32 compression_factor{GetCompressionFactor(pixel_format)}; |
| 250 | ASSERT(width % compression_factor == 0); | 253 | ASSERT(width % compression_factor == 0); |
| @@ -253,10 +256,18 @@ struct SurfaceParams { | |||
| 253 | GetFormatBpp(pixel_format) / CHAR_BIT; | 256 | GetFormatBpp(pixel_format) / CHAR_BIT; |
| 254 | } | 257 | } |
| 255 | 258 | ||
| 259 | /// Returns the CPU virtual address for this surface | ||
| 256 | VAddr GetCpuAddr() const; | 260 | VAddr GetCpuAddr() const; |
| 257 | 261 | ||
| 262 | /// Returns true if the specified region overlaps with this surface's region in Switch memory | ||
| 263 | bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { | ||
| 264 | return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); | ||
| 265 | } | ||
| 266 | |||
| 267 | /// Creates SurfaceParams from a texture configation | ||
| 258 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | 268 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); |
| 259 | 269 | ||
| 270 | /// Creates SurfaceParams from a framebuffer configation | ||
| 260 | static SurfaceParams CreateForFramebuffer( | 271 | static SurfaceParams CreateForFramebuffer( |
| 261 | const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); | 272 | const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); |
| 262 | 273 | ||
| @@ -272,6 +283,7 @@ struct SurfaceParams { | |||
| 272 | size_t size_in_bytes; | 283 | size_t size_in_bytes; |
| 273 | }; | 284 | }; |
| 274 | 285 | ||
| 286 | /// Hashable variation of SurfaceParams, used for a key in the surface cache | ||
| 275 | struct SurfaceKey : Common::HashableStruct<SurfaceParams> { | 287 | struct SurfaceKey : Common::HashableStruct<SurfaceParams> { |
| 276 | static SurfaceKey Create(const SurfaceParams& params) { | 288 | static SurfaceKey Create(const SurfaceParams& params) { |
| 277 | SurfaceKey res; | 289 | SurfaceKey res; |
| @@ -325,18 +337,43 @@ private: | |||
| 325 | class RasterizerCacheOpenGL final : NonCopyable { | 337 | class RasterizerCacheOpenGL final : NonCopyable { |
| 326 | public: | 338 | public: |
| 327 | RasterizerCacheOpenGL(); | 339 | RasterizerCacheOpenGL(); |
| 340 | ~RasterizerCacheOpenGL(); | ||
| 328 | 341 | ||
| 342 | /// Get a surface based on the texture configuration | ||
| 329 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); | 343 | Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); |
| 344 | |||
| 345 | /// Get the color and depth surfaces based on the framebuffer configuration | ||
| 330 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, | 346 | SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, |
| 331 | const MathUtil::Rectangle<s32>& viewport); | 347 | const MathUtil::Rectangle<s32>& viewport); |
| 332 | void LoadSurface(const Surface& surface); | 348 | |
| 333 | void FlushSurface(const Surface& surface); | 349 | /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory |
| 350 | void MarkSurfaceAsDirty(const Surface& surface); | ||
| 351 | |||
| 352 | /// Tries to find a framebuffer GPU address based on the provided CPU address | ||
| 334 | Surface TryFindFramebufferSurface(VAddr cpu_addr) const; | 353 | Surface TryFindFramebufferSurface(VAddr cpu_addr) const; |
| 335 | 354 | ||
| 355 | /// Write any cached resources overlapping the region back to memory (if dirty) | ||
| 356 | void FlushRegion(Tegra::GPUVAddr addr, size_t size); | ||
| 357 | |||
| 358 | /// Mark the specified region as being invalidated | ||
| 359 | void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); | ||
| 360 | |||
| 336 | private: | 361 | private: |
| 362 | void LoadSurface(const Surface& surface); | ||
| 337 | Surface GetSurface(const SurfaceParams& params); | 363 | Surface GetSurface(const SurfaceParams& params); |
| 338 | 364 | ||
| 365 | /// Register surface into the cache | ||
| 366 | void RegisterSurface(const Surface& surface); | ||
| 367 | |||
| 368 | /// Remove surface from the cache | ||
| 369 | void UnregisterSurface(const Surface& surface); | ||
| 370 | |||
| 371 | /// Increase/decrease the number of surface in pages touching the specified region | ||
| 372 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); | ||
| 373 | |||
| 339 | std::unordered_map<SurfaceKey, Surface> surface_cache; | 374 | std::unordered_map<SurfaceKey, Surface> surface_cache; |
| 375 | PageMap cached_pages; | ||
| 376 | |||
| 340 | OGLFramebuffer read_framebuffer; | 377 | OGLFramebuffer read_framebuffer; |
| 341 | OGLFramebuffer draw_framebuffer; | 378 | OGLFramebuffer draw_framebuffer; |
| 342 | }; | 379 | }; |