diff options
| author | 2018-04-24 23:22:24 -0400 | |
|---|---|---|
| committer | 2018-04-24 23:22:24 -0400 | |
| commit | ea3151f475e170eaaec3ded306a0fe5c1e5944db (patch) | |
| tree | 6f7e127c4f58de6071d9a7dbd2af464dbbd14b9b /src/core/memory.cpp | |
| parent | Merge pull request #393 from lioncash/loader (diff) | |
| parent | renderer_opengl: Use correct byte order for framebuffer pixel format ABGR8. (diff) | |
| download | yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.gz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.xz yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.zip | |
Merge pull request #388 from bunnei/refactor-rasterizer-cache
Refactor rasterizer cache
Diffstat (limited to 'src/core/memory.cpp')
| -rw-r--r-- | src/core/memory.cpp | 64 |
1 files changed, 48 insertions, 16 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 291bf066f..ff0420c56 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -325,15 +325,29 @@ u8* GetPhysicalPointer(PAddr address) { | |||
| 325 | return target_pointer; | 325 | return target_pointer; |
| 326 | } | 326 | } |
| 327 | 327 | ||
| 328 | void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { | 328 | void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) { |
| 329 | if (start == 0) { | 329 | if (gpu_addr == 0) { |
| 330 | return; | 330 | return; |
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | u64 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1; | 333 | // Iterate over a contiguous CPU address space, which corresponds to the specified GPU address |
| 334 | VAddr vaddr = start; | 334 | // space, marking the region as un/cached. The region is marked un/cached at a granularity of |
| 335 | // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This | ||
| 336 | // assumes the specified GPU address region is contiguous as well. | ||
| 337 | |||
| 338 | u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1; | ||
| 339 | for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) { | ||
| 340 | boost::optional<VAddr> maybe_vaddr = | ||
| 341 | Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(gpu_addr); | ||
| 342 | // The GPU <-> CPU virtual memory mapping is not 1:1 | ||
| 343 | if (!maybe_vaddr) { | ||
| 344 | LOG_ERROR(HW_Memory, | ||
| 345 | "Trying to flush a cached region to an invalid physical address %08X", | ||
| 346 | gpu_addr); | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | VAddr vaddr = *maybe_vaddr; | ||
| 335 | 350 | ||
| 336 | for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { | ||
| 337 | PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; | 351 | PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; |
| 338 | 352 | ||
| 339 | if (cached) { | 353 | if (cached) { |
| @@ -347,6 +361,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { | |||
| 347 | page_type = PageType::RasterizerCachedMemory; | 361 | page_type = PageType::RasterizerCachedMemory; |
| 348 | current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; | 362 | current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; |
| 349 | break; | 363 | break; |
| 364 | case PageType::RasterizerCachedMemory: | ||
| 365 | // There can be more than one GPU region mapped per CPU region, so it's common that | ||
| 366 | // this area is already marked as cached. | ||
| 367 | break; | ||
| 350 | default: | 368 | default: |
| 351 | UNREACHABLE(); | 369 | UNREACHABLE(); |
| 352 | } | 370 | } |
| @@ -357,6 +375,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { | |||
| 357 | // It is not necessary for a process to have this region mapped into its address | 375 | // It is not necessary for a process to have this region mapped into its address |
| 358 | // space, for example, a system module need not have a VRAM mapping. | 376 | // space, for example, a system module need not have a VRAM mapping. |
| 359 | break; | 377 | break; |
| 378 | case PageType::Memory: | ||
| 379 | // There can be more than one GPU region mapped per CPU region, so it's common that | ||
| 380 | // this area is already unmarked as cached. | ||
| 381 | break; | ||
| 360 | case PageType::RasterizerCachedMemory: { | 382 | case PageType::RasterizerCachedMemory: { |
| 361 | u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); | 383 | u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); |
| 362 | if (pointer == nullptr) { | 384 | if (pointer == nullptr) { |
| @@ -394,19 +416,29 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 394 | 416 | ||
| 395 | VAddr overlap_start = std::max(start, region_start); | 417 | VAddr overlap_start = std::max(start, region_start); |
| 396 | VAddr overlap_end = std::min(end, region_end); | 418 | VAddr overlap_end = std::min(end, region_end); |
| 419 | |||
| 420 | std::vector<Tegra::GPUVAddr> gpu_addresses = | ||
| 421 | Core::System::GetInstance().GPU().memory_manager->CpuToGpuAddress(overlap_start); | ||
| 422 | |||
| 423 | if (gpu_addresses.empty()) { | ||
| 424 | return; | ||
| 425 | } | ||
| 426 | |||
| 397 | u64 overlap_size = overlap_end - overlap_start; | 427 | u64 overlap_size = overlap_end - overlap_start; |
| 398 | 428 | ||
| 399 | auto* rasterizer = VideoCore::g_renderer->Rasterizer(); | 429 | for (const auto& gpu_address : gpu_addresses) { |
| 400 | switch (mode) { | 430 | auto* rasterizer = VideoCore::g_renderer->Rasterizer(); |
| 401 | case FlushMode::Flush: | 431 | switch (mode) { |
| 402 | rasterizer->FlushRegion(overlap_start, overlap_size); | 432 | case FlushMode::Flush: |
| 403 | break; | 433 | rasterizer->FlushRegion(gpu_address, overlap_size); |
| 404 | case FlushMode::Invalidate: | 434 | break; |
| 405 | rasterizer->InvalidateRegion(overlap_start, overlap_size); | 435 | case FlushMode::Invalidate: |
| 406 | break; | 436 | rasterizer->InvalidateRegion(gpu_address, overlap_size); |
| 407 | case FlushMode::FlushAndInvalidate: | 437 | break; |
| 408 | rasterizer->FlushAndInvalidateRegion(overlap_start, overlap_size); | 438 | case FlushMode::FlushAndInvalidate: |
| 409 | break; | 439 | rasterizer->FlushAndInvalidateRegion(gpu_address, overlap_size); |
| 440 | break; | ||
| 441 | } | ||
| 410 | } | 442 | } |
| 411 | }; | 443 | }; |
| 412 | 444 | ||