diff options
| author | 2018-08-27 21:35:15 -0400 | |
|---|---|---|
| committer | 2018-08-31 13:07:27 -0400 | |
| commit | d647d9550c838fdfb16ad053f3ff243de3e15482 (patch) | |
| tree | 8bde428c4c670d134dd13b849044e4369566d8f9 /src | |
| parent | gl_rasterizer: Fix issues with the rasterizer cache. (diff) | |
| download | yuzu-d647d9550c838fdfb16ad053f3ff243de3e15482.tar.gz yuzu-d647d9550c838fdfb16ad053f3ff243de3e15482.tar.xz yuzu-d647d9550c838fdfb16ad053f3ff243de3e15482.zip | |
gl_renderer: Cache textures, framebuffers, and shaders based on CPU address.
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 1 | ||||
| -rw-r--r-- | src/core/memory.cpp | 51 | ||||
| -rw-r--r-- | src/core/memory.h | 3 | ||||
| -rw-r--r-- | src/video_core/rasterizer_cache.h | 8 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 78 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 7 |
11 files changed, 70 insertions, 138 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 650ed8fbc..03b7356d0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "video_core/memory_manager.h" | ||
| 13 | 14 | ||
| 14 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 15 | 16 | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index bc34bfd6d..0e4e0157c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -251,8 +251,8 @@ std::string ReadCString(VAddr vaddr, std::size_t max_length) { | |||
| 251 | return string; | 251 | return string; |
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) { | 254 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { |
| 255 | if (gpu_addr == 0) { | 255 | if (vaddr == 0) { |
| 256 | return; | 256 | return; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| @@ -261,19 +261,8 @@ void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) | |||
| 261 | // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This | 261 | // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This |
| 262 | // assumes the specified GPU address region is contiguous as well. | 262 | // assumes the specified GPU address region is contiguous as well. |
| 263 | 263 | ||
| 264 | u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1; | 264 | u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; |
| 265 | for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) { | 265 | for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { |
| 266 | boost::optional<VAddr> maybe_vaddr = | ||
| 267 | Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||
| 268 | // The GPU <-> CPU virtual memory mapping is not 1:1 | ||
| 269 | if (!maybe_vaddr) { | ||
| 270 | LOG_ERROR(HW_Memory, | ||
| 271 | "Trying to flush a cached region to an invalid physical address {:016X}", | ||
| 272 | gpu_addr); | ||
| 273 | continue; | ||
| 274 | } | ||
| 275 | VAddr vaddr = *maybe_vaddr; | ||
| 276 | |||
| 277 | PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; | 266 | PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; |
| 278 | 267 | ||
| 279 | if (cached) { | 268 | if (cached) { |
| @@ -344,29 +333,19 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 344 | 333 | ||
| 345 | const VAddr overlap_start = std::max(start, region_start); | 334 | const VAddr overlap_start = std::max(start, region_start); |
| 346 | const VAddr overlap_end = std::min(end, region_end); | 335 | const VAddr overlap_end = std::min(end, region_end); |
| 347 | |||
| 348 | const std::vector<Tegra::GPUVAddr> gpu_addresses = | ||
| 349 | system_instance.GPU().MemoryManager().CpuToGpuAddress(overlap_start); | ||
| 350 | |||
| 351 | if (gpu_addresses.empty()) { | ||
| 352 | return; | ||
| 353 | } | ||
| 354 | |||
| 355 | const u64 overlap_size = overlap_end - overlap_start; | 336 | const u64 overlap_size = overlap_end - overlap_start; |
| 356 | 337 | ||
| 357 | for (const auto& gpu_address : gpu_addresses) { | 338 | auto& rasterizer = system_instance.Renderer().Rasterizer(); |
| 358 | auto& rasterizer = system_instance.Renderer().Rasterizer(); | 339 | switch (mode) { |
| 359 | switch (mode) { | 340 | case FlushMode::Flush: |
| 360 | case FlushMode::Flush: | 341 | rasterizer.FlushRegion(overlap_start, overlap_size); |
| 361 | rasterizer.FlushRegion(gpu_address, overlap_size); | 342 | break; |
| 362 | break; | 343 | case FlushMode::Invalidate: |
| 363 | case FlushMode::Invalidate: | 344 | rasterizer.InvalidateRegion(overlap_start, overlap_size); |
| 364 | rasterizer.InvalidateRegion(gpu_address, overlap_size); | 345 | break; |
| 365 | break; | 346 | case FlushMode::FlushAndInvalidate: |
| 366 | case FlushMode::FlushAndInvalidate: | 347 | rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); |
| 367 | rasterizer.FlushAndInvalidateRegion(gpu_address, overlap_size); | 348 | break; |
| 368 | break; | ||
| 369 | } | ||
| 370 | } | 349 | } |
| 371 | }; | 350 | }; |
| 372 | 351 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index b7fb3b9ed..f06e04a75 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <boost/icl/interval_map.hpp> | 11 | #include <boost/icl/interval_map.hpp> |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "core/memory_hook.h" | 13 | #include "core/memory_hook.h" |
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | 14 | ||
| 16 | namespace Kernel { | 15 | namespace Kernel { |
| 17 | class Process; | 16 | class Process; |
| @@ -179,7 +178,7 @@ enum class FlushMode { | |||
| 179 | /** | 178 | /** |
| 180 | * Mark each page touching the region as cached. | 179 | * Mark each page touching the region as cached. |
| 181 | */ | 180 | */ |
| 182 | void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached); | 181 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); |
| 183 | 182 | ||
| 184 | /** | 183 | /** |
| 185 | * Flushes and invalidates any externally cached rasterizer resources touching the given virtual | 184 | * Flushes and invalidates any externally cached rasterizer resources touching the given virtual |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 51245f502..599d39f62 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -17,7 +17,7 @@ template <class T> | |||
| 17 | class RasterizerCache : NonCopyable { | 17 | class RasterizerCache : NonCopyable { |
| 18 | public: | 18 | public: |
| 19 | /// Mark the specified region as being invalidated | 19 | /// Mark the specified region as being invalidated |
| 20 | void InvalidateRegion(Tegra::GPUVAddr region_addr, size_t region_size) { | 20 | void InvalidateRegion(VAddr region_addr, size_t region_size) { |
| 21 | for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) { | 21 | for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) { |
| 22 | const auto& object{iter->second}; | 22 | const auto& object{iter->second}; |
| 23 | 23 | ||
| @@ -33,7 +33,7 @@ public: | |||
| 33 | 33 | ||
| 34 | protected: | 34 | protected: |
| 35 | /// Tries to get an object from the cache with the specified address | 35 | /// Tries to get an object from the cache with the specified address |
| 36 | T TryGet(Tegra::GPUVAddr addr) const { | 36 | T TryGet(VAddr addr) const { |
| 37 | const auto& search{cached_objects.find(addr)}; | 37 | const auto& search{cached_objects.find(addr)}; |
| 38 | if (search != cached_objects.end()) { | 38 | if (search != cached_objects.end()) { |
| 39 | return search->second; | 39 | return search->second; |
| @@ -43,7 +43,7 @@ protected: | |||
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | /// Gets a reference to the cache | 45 | /// Gets a reference to the cache |
| 46 | const std::unordered_map<Tegra::GPUVAddr, T>& GetCache() const { | 46 | const std::unordered_map<VAddr, T>& GetCache() const { |
| 47 | return cached_objects; | 47 | return cached_objects; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| @@ -74,5 +74,5 @@ protected: | |||
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | private: | 76 | private: |
| 77 | std::unordered_map<Tegra::GPUVAddr, T> cached_objects; | 77 | std::unordered_map<VAddr, T> cached_objects; |
| 78 | }; | 78 | }; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6b8b64709..9d78e8b6b 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -27,14 +27,14 @@ public: | |||
| 27 | virtual void FlushAll() = 0; | 27 | virtual void FlushAll() = 0; |
| 28 | 28 | ||
| 29 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 29 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 30 | virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0; | 30 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 31 | 31 | ||
| 32 | /// Notify rasterizer that any caches of the specified region should be invalidated | 32 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 33 | virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; | 33 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 34 | 34 | ||
| 35 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 35 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 36 | /// and invalidated | 36 | /// and invalidated |
| 37 | virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; | 37 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| 38 | 38 | ||
| 39 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 | 39 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 |
| 40 | virtual bool AccelerateDisplayTransfer(const void* config) { | 40 | virtual bool AccelerateDisplayTransfer(const void* config) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e70e22ba2..cab6cf53e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -279,10 +279,9 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | |||
| 279 | return boost::make_iterator_range(map.equal_range(interval)); | 279 | return boost::make_iterator_range(map.equal_range(interval)); |
| 280 | } | 280 | } |
| 281 | 281 | ||
| 282 | void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | 282 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 283 | const u64 page_start{addr >> Tegra::MemoryManager::PAGE_BITS}; | 283 | const u64 page_start{addr >> Memory::PAGE_BITS}; |
| 284 | const u64 page_end{(addr + size + Tegra::MemoryManager::PAGE_SIZE - 1) >> | 284 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; |
| 285 | Tegra::MemoryManager::PAGE_BITS}; | ||
| 286 | 285 | ||
| 287 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | 286 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to |
| 288 | // subtract after iterating | 287 | // subtract after iterating |
| @@ -294,10 +293,8 @@ void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, in | |||
| 294 | const auto interval = pair.first & pages_interval; | 293 | const auto interval = pair.first & pages_interval; |
| 295 | const int count = pair.second; | 294 | const int count = pair.second; |
| 296 | 295 | ||
| 297 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | 296 | const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; |
| 298 | << Tegra::MemoryManager::PAGE_BITS; | 297 | const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; |
| 299 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 300 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 301 | const u64 interval_size = interval_end_addr - interval_start_addr; | 298 | const u64 interval_size = interval_end_addr - interval_start_addr; |
| 302 | 299 | ||
| 303 | if (delta > 0 && count == delta) | 300 | if (delta > 0 && count == delta) |
| @@ -578,17 +575,17 @@ void RasterizerOpenGL::FlushAll() { | |||
| 578 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 575 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 579 | } | 576 | } |
| 580 | 577 | ||
| 581 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { | 578 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 582 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 579 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 583 | } | 580 | } |
| 584 | 581 | ||
| 585 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 582 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| 586 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 583 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 587 | res_cache.InvalidateRegion(addr, size); | 584 | res_cache.InvalidateRegion(addr, size); |
| 588 | shader_cache.InvalidateRegion(addr, size); | 585 | shader_cache.InvalidateRegion(addr, size); |
| 589 | } | 586 | } |
| 590 | 587 | ||
| 591 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 588 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 592 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 589 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 593 | InvalidateRegion(addr, size); | 590 | InvalidateRegion(addr, size); |
| 594 | } | 591 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 57837bef6..30045ebff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -44,9 +44,9 @@ public: | |||
| 44 | void Clear() override; | 44 | void Clear() override; |
| 45 | void NotifyMaxwellRegisterChanged(u32 method) override; | 45 | void NotifyMaxwellRegisterChanged(u32 method) override; |
| 46 | void FlushAll() override; | 46 | void FlushAll() override; |
| 47 | void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; | 47 | void FlushRegion(VAddr addr, u64 size) override; |
| 48 | void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; | 48 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 49 | void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; | 49 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 50 | bool AccelerateDisplayTransfer(const void* config) override; | 50 | bool AccelerateDisplayTransfer(const void* config) override; |
| 51 | bool AccelerateTextureCopy(const void* config) override; | 51 | bool AccelerateTextureCopy(const void* config) override; |
| 52 | bool AccelerateFill(const void* config) override; | 52 | bool AccelerateFill(const void* config) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index e53a1a2ec..8cd4c2956 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -33,11 +33,16 @@ struct FormatTuple { | |||
| 33 | bool compressed; | 33 | bool compressed; |
| 34 | }; | 34 | }; |
| 35 | 35 | ||
| 36 | static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | ||
| 37 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 38 | const auto cpu_addr{gpu.MemoryManager().GpuToCpuAddress(gpu_addr)}; | ||
| 39 | return cpu_addr ? *cpu_addr : 0; | ||
| 40 | } | ||
| 41 | |||
| 36 | /*static*/ SurfaceParams SurfaceParams::CreateForTexture( | 42 | /*static*/ SurfaceParams SurfaceParams::CreateForTexture( |
| 37 | const Tegra::Texture::FullTextureInfo& config) { | 43 | const Tegra::Texture::FullTextureInfo& config) { |
| 38 | |||
| 39 | SurfaceParams params{}; | 44 | SurfaceParams params{}; |
| 40 | params.addr = config.tic.Address(); | 45 | params.addr = TryGetCpuAddr(config.tic.Address()); |
| 41 | params.is_tiled = config.tic.IsTiled(); | 46 | params.is_tiled = config.tic.IsTiled(); |
| 42 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | 47 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, |
| 43 | params.pixel_format = | 48 | params.pixel_format = |
| @@ -55,9 +60,8 @@ struct FormatTuple { | |||
| 55 | 60 | ||
| 56 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( | 61 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( |
| 57 | const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { | 62 | const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { |
| 58 | |||
| 59 | SurfaceParams params{}; | 63 | SurfaceParams params{}; |
| 60 | params.addr = config.Address(); | 64 | params.addr = TryGetCpuAddr(config.Address()); |
| 61 | params.is_tiled = true; | 65 | params.is_tiled = true; |
| 62 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 66 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; |
| 63 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 67 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| @@ -75,9 +79,8 @@ struct FormatTuple { | |||
| 75 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, | 79 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, |
| 76 | Tegra::GPUVAddr zeta_address, | 80 | Tegra::GPUVAddr zeta_address, |
| 77 | Tegra::DepthFormat format) { | 81 | Tegra::DepthFormat format) { |
| 78 | |||
| 79 | SurfaceParams params{}; | 82 | SurfaceParams params{}; |
| 80 | params.addr = zeta_address; | 83 | params.addr = TryGetCpuAddr(zeta_address); |
| 81 | params.is_tiled = true; | 84 | params.is_tiled = true; |
| 82 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 85 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; |
| 83 | params.pixel_format = PixelFormatFromDepthFormat(format); | 86 | params.pixel_format = PixelFormatFromDepthFormat(format); |
| @@ -171,11 +174,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | |||
| 171 | return format; | 174 | return format; |
| 172 | } | 175 | } |
| 173 | 176 | ||
| 174 | VAddr SurfaceParams::GetCpuAddr() const { | ||
| 175 | auto& gpu = Core::System::GetInstance().GPU(); | ||
| 176 | return *gpu.MemoryManager().GpuToCpuAddress(addr); | ||
| 177 | } | ||
| 178 | |||
| 179 | static bool IsPixelFormatASTC(PixelFormat format) { | 177 | static bool IsPixelFormatASTC(PixelFormat format) { |
| 180 | switch (format) { | 178 | switch (format) { |
| 181 | case PixelFormat::ASTC_2D_4X4: | 179 | case PixelFormat::ASTC_2D_4X4: |
| @@ -222,33 +220,28 @@ static bool IsFormatBCn(PixelFormat format) { | |||
| 222 | } | 220 | } |
| 223 | 221 | ||
| 224 | template <bool morton_to_gl, PixelFormat format> | 222 | template <bool morton_to_gl, PixelFormat format> |
| 225 | void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, | 223 | void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, VAddr addr) { |
| 226 | Tegra::GPUVAddr addr) { | ||
| 227 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 224 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 228 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 225 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| 229 | auto& gpu = Core::System::GetInstance().GPU(); | ||
| 230 | 226 | ||
| 231 | if (morton_to_gl) { | 227 | if (morton_to_gl) { |
| 232 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | 228 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual |
| 233 | // pixel values. | 229 | // pixel values. |
| 234 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | 230 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; |
| 235 | const std::vector<u8> data = | 231 | const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( |
| 236 | Tegra::Texture::UnswizzleTexture(*gpu.MemoryManager().GpuToCpuAddress(addr), tile_size, | 232 | addr, tile_size, bytes_per_pixel, stride, height, block_height); |
| 237 | bytes_per_pixel, stride, height, block_height); | ||
| 238 | const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; | 233 | const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; |
| 239 | gl_buffer.assign(data.begin(), data.begin() + size_to_copy); | 234 | gl_buffer.assign(data.begin(), data.begin() + size_to_copy); |
| 240 | } else { | 235 | } else { |
| 241 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should | 236 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should |
| 242 | // check the configuration for this and perform more generic un/swizzle | 237 | // check the configuration for this and perform more generic un/swizzle |
| 243 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); | 238 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); |
| 244 | VideoCore::MortonCopyPixels128( | 239 | VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, |
| 245 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, | 240 | Memory::GetPointer(addr), gl_buffer.data(), morton_to_gl); |
| 246 | Memory::GetPointer(*gpu.MemoryManager().GpuToCpuAddress(addr)), gl_buffer.data(), | ||
| 247 | morton_to_gl); | ||
| 248 | } | 241 | } |
| 249 | } | 242 | } |
| 250 | 243 | ||
| 251 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), | 244 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr), |
| 252 | SurfaceParams::MaxPixelFormat> | 245 | SurfaceParams::MaxPixelFormat> |
| 253 | morton_to_gl_fns = { | 246 | morton_to_gl_fns = { |
| 254 | // clang-format off | 247 | // clang-format off |
| @@ -305,7 +298,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU | |||
| 305 | // clang-format on | 298 | // clang-format on |
| 306 | }; | 299 | }; |
| 307 | 300 | ||
| 308 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), | 301 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr), |
| 309 | SurfaceParams::MaxPixelFormat> | 302 | SurfaceParams::MaxPixelFormat> |
| 310 | gl_to_morton_fns = { | 303 | gl_to_morton_fns = { |
| 311 | // clang-format off | 304 | // clang-format off |
| @@ -542,7 +535,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 | |||
| 542 | void CachedSurface::LoadGLBuffer() { | 535 | void CachedSurface::LoadGLBuffer() { |
| 543 | ASSERT(params.type != SurfaceType::Fill); | 536 | ASSERT(params.type != SurfaceType::Fill); |
| 544 | 537 | ||
| 545 | const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); | 538 | const u8* const texture_src_data = Memory::GetPointer(params.addr); |
| 546 | 539 | ||
| 547 | ASSERT(texture_src_data); | 540 | ASSERT(texture_src_data); |
| 548 | 541 | ||
| @@ -567,7 +560,7 @@ void CachedSurface::LoadGLBuffer() { | |||
| 567 | 560 | ||
| 568 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | 561 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); |
| 569 | void CachedSurface::FlushGLBuffer() { | 562 | void CachedSurface::FlushGLBuffer() { |
| 570 | u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); | 563 | u8* const dst_buffer = Memory::GetPointer(params.addr); |
| 571 | 564 | ||
| 572 | ASSERT(dst_buffer); | 565 | ASSERT(dst_buffer); |
| 573 | ASSERT(gl_buffer.size() == | 566 | ASSERT(gl_buffer.size() == |
| @@ -764,11 +757,6 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 764 | return {}; | 757 | return {}; |
| 765 | } | 758 | } |
| 766 | 759 | ||
| 767 | auto& gpu = Core::System::GetInstance().GPU(); | ||
| 768 | // Don't try to create any entries in the cache if the address of the texture is invalid. | ||
| 769 | if (gpu.MemoryManager().GpuToCpuAddress(params.addr) == boost::none) | ||
| 770 | return {}; | ||
| 771 | |||
| 772 | // Look up surface in the cache based on address | 760 | // Look up surface in the cache based on address |
| 773 | Surface surface{TryGet(params.addr)}; | 761 | Surface surface{TryGet(params.addr)}; |
| 774 | if (surface) { | 762 | if (surface) { |
| @@ -858,10 +846,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | |||
| 858 | "reinterpretation but the texture is tiled."); | 846 | "reinterpretation but the texture is tiled."); |
| 859 | } | 847 | } |
| 860 | size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); | 848 | size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); |
| 861 | auto address = Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress( | ||
| 862 | new_params.addr + params.SizeInBytes()); | ||
| 863 | std::vector<u8> data(remaining_size); | 849 | std::vector<u8> data(remaining_size); |
| 864 | Memory::ReadBlock(*address, data.data(), data.size()); | 850 | Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); |
| 865 | glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, data.data()); | 851 | glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, data.data()); |
| 866 | } | 852 | } |
| 867 | 853 | ||
| @@ -888,30 +874,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | |||
| 888 | return new_surface; | 874 | return new_surface; |
| 889 | } | 875 | } |
| 890 | 876 | ||
| 891 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { | 877 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { |
| 892 | // Tries to find the GPU address of a framebuffer based on the CPU address. This is because | 878 | return TryGet(addr); |
| 893 | // final output framebuffers are specified by CPU address, but internally our GPU cache uses | ||
| 894 | // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU | ||
| 895 | // address to the one provided. This is obviously not great, and won't work if the | ||
| 896 | // framebuffer overlaps surfaces. | ||
| 897 | |||
| 898 | std::vector<Surface> surfaces; | ||
| 899 | for (const auto& surface : GetCache()) { | ||
| 900 | const auto& params = surface.second->GetSurfaceParams(); | ||
| 901 | const VAddr surface_cpu_addr = params.GetCpuAddr(); | ||
| 902 | if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { | ||
| 903 | ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); | ||
| 904 | surfaces.push_back(surface.second); | ||
| 905 | } | ||
| 906 | } | ||
| 907 | |||
| 908 | if (surfaces.empty()) { | ||
| 909 | return {}; | ||
| 910 | } | ||
| 911 | |||
| 912 | ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported"); | ||
| 913 | |||
| 914 | return surfaces[0]; | ||
| 915 | } | 879 | } |
| 916 | 880 | ||
| 917 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | 881 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index ecdd8d8e5..f381e735f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -638,9 +638,6 @@ struct SurfaceParams { | |||
| 638 | GetFormatBpp(pixel_format) / CHAR_BIT; | 638 | GetFormatBpp(pixel_format) / CHAR_BIT; |
| 639 | } | 639 | } |
| 640 | 640 | ||
| 641 | /// Returns the CPU virtual address for this surface | ||
| 642 | VAddr GetCpuAddr() const; | ||
| 643 | |||
| 644 | /// Creates SurfaceParams from a texture configuration | 641 | /// Creates SurfaceParams from a texture configuration |
| 645 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | 642 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); |
| 646 | 643 | ||
| @@ -671,7 +668,7 @@ struct SurfaceParams { | |||
| 671 | std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); | 668 | std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); |
| 672 | } | 669 | } |
| 673 | 670 | ||
| 674 | Tegra::GPUVAddr addr; | 671 | VAddr addr; |
| 675 | bool is_tiled; | 672 | bool is_tiled; |
| 676 | u32 block_height; | 673 | u32 block_height; |
| 677 | PixelFormat pixel_format; | 674 | PixelFormat pixel_format; |
| @@ -712,7 +709,7 @@ class CachedSurface final { | |||
| 712 | public: | 709 | public: |
| 713 | CachedSurface(const SurfaceParams& params); | 710 | CachedSurface(const SurfaceParams& params); |
| 714 | 711 | ||
| 715 | Tegra::GPUVAddr GetAddr() const { | 712 | VAddr GetAddr() const { |
| 716 | return params.addr; | 713 | return params.addr; |
| 717 | } | 714 | } |
| 718 | 715 | ||
| @@ -763,8 +760,8 @@ public: | |||
| 763 | /// Flushes the surface to Switch memory | 760 | /// Flushes the surface to Switch memory |
| 764 | void FlushSurface(const Surface& surface); | 761 | void FlushSurface(const Surface& surface); |
| 765 | 762 | ||
| 766 | /// Tries to find a framebuffer GPU address based on the provided CPU address | 763 | /// Tries to find a framebuffer using on the provided CPU address |
| 767 | Surface TryFindFramebufferSurface(VAddr cpu_addr) const; | 764 | Surface TryFindFramebufferSurface(VAddr addr) const; |
| 768 | 765 | ||
| 769 | private: | 766 | private: |
| 770 | void LoadSurface(const Surface& surface); | 767 | void LoadSurface(const Surface& surface); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 326a901ba..ac9adfd83 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -12,21 +12,17 @@ | |||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | /// Gets the address for the specified shader stage program | 14 | /// Gets the address for the specified shader stage program |
| 15 | static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 15 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 16 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 16 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 17 | auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; | 17 | auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; |
| 18 | 18 | return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + | |
| 19 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | 19 | shader_config.offset); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | /// Gets the shader program code from memory for the specified address | 22 | /// Gets the shader program code from memory for the specified address |
| 23 | static GLShader::ProgramCode GetShaderCode(Tegra::GPUVAddr addr) { | 23 | static GLShader::ProgramCode GetShaderCode(VAddr addr) { |
| 24 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 25 | |||
| 26 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | 24 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); |
| 27 | const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(addr)}; | 25 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |
| 28 | Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | ||
| 29 | |||
| 30 | return program_code; | 26 | return program_code; |
| 31 | } | 27 | } |
| 32 | 28 | ||
| @@ -55,7 +51,7 @@ static void SetShaderUniformBlockBindings(GLuint shader) { | |||
| 55 | sizeof(GLShader::MaxwellUniformData)); | 51 | sizeof(GLShader::MaxwellUniformData)); |
| 56 | } | 52 | } |
| 57 | 53 | ||
| 58 | CachedShader::CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type) | 54 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) |
| 59 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | 55 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { |
| 60 | 56 | ||
| 61 | GLShader::ProgramResult program_result; | 57 | GLShader::ProgramResult program_result; |
| @@ -113,7 +109,7 @@ GLint CachedShader::GetUniformLocation(const std::string& name) { | |||
| 113 | } | 109 | } |
| 114 | 110 | ||
| 115 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 111 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 116 | const Tegra::GPUVAddr program_addr{GetShaderAddress(program)}; | 112 | const VAddr program_addr{GetShaderAddress(program)}; |
| 117 | 113 | ||
| 118 | // Look up shader in the cache based on address | 114 | // Look up shader in the cache based on address |
| 119 | Shader shader{TryGet(program_addr)}; | 115 | Shader shader{TryGet(program_addr)}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 658f9e994..759987604 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/memory_manager.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -21,10 +20,10 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 21 | 20 | ||
| 22 | class CachedShader final { | 21 | class CachedShader final { |
| 23 | public: | 22 | public: |
| 24 | CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type); | 23 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); |
| 25 | 24 | ||
| 26 | /// Gets the address of the shader in guest memory, required for cache management | 25 | /// Gets the address of the shader in guest memory, required for cache management |
| 27 | Tegra::GPUVAddr GetAddr() const { | 26 | VAddr GetAddr() const { |
| 28 | return addr; | 27 | return addr; |
| 29 | } | 28 | } |
| 30 | 29 | ||
| @@ -50,7 +49,7 @@ public: | |||
| 50 | GLint GetUniformLocation(const std::string& name); | 49 | GLint GetUniformLocation(const std::string& name); |
| 51 | 50 | ||
| 52 | private: | 51 | private: |
| 53 | Tegra::GPUVAddr addr; | 52 | VAddr addr; |
| 54 | Maxwell::ShaderProgram program_type; | 53 | Maxwell::ShaderProgram program_type; |
| 55 | GLShader::ShaderSetup setup; | 54 | GLShader::ShaderSetup setup; |
| 56 | GLShader::ShaderEntries entries; | 55 | GLShader::ShaderEntries entries; |