diff options
| author | 2018-08-31 13:24:21 -0400 | |
|---|---|---|
| committer | 2018-08-31 13:24:21 -0400 | |
| commit | 42588493d5ad5d824fc557ac936e64e5e7fd7e44 (patch) | |
| tree | d06ab2b0bd2f426a8fb129cea437ed99e29ed5e9 /src | |
| parent | Implement BC6H_UF16 & BC6H_SF16 (#1092) (diff) | |
| parent | gl_rasterizer_cache: Use accurate framebuffer setting for accurate copies. (diff) | |
| download | yuzu-42588493d5ad5d824fc557ac936e64e5e7fd7e44.tar.gz yuzu-42588493d5ad5d824fc557ac936e64e5e7fd7e44.tar.xz yuzu-42588493d5ad5d824fc557ac936e64e5e7fd7e44.zip | |
Merge pull request #1205 from bunnei/improve-rasterizer-cache-2
Various fixes and improvements to rasterizer cache 2: Electric Boogaloo
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_gpu.h | 1 | ||||
| -rw-r--r-- | src/core/memory.cpp | 51 | ||||
| -rw-r--r-- | src/core/memory.h | 3 | ||||
| -rw-r--r-- | src/video_core/rasterizer_cache.h | 124 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 9 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 61 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 210 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 26 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 7 |
11 files changed, 227 insertions, 297 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 650ed8fbc..03b7356d0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/swap.h" | 11 | #include "common/swap.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" | 12 | #include "core/hle/service/nvdrv/devices/nvdevice.h" |
| 13 | #include "video_core/memory_manager.h" | ||
| 13 | 14 | ||
| 14 | namespace Service::Nvidia::Devices { | 15 | namespace Service::Nvidia::Devices { |
| 15 | 16 | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index bc34bfd6d..0e4e0157c 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -251,8 +251,8 @@ std::string ReadCString(VAddr vaddr, std::size_t max_length) { | |||
| 251 | return string; | 251 | return string; |
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) { | 254 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { |
| 255 | if (gpu_addr == 0) { | 255 | if (vaddr == 0) { |
| 256 | return; | 256 | return; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| @@ -261,19 +261,8 @@ void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) | |||
| 261 | // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This | 261 | // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This |
| 262 | // assumes the specified GPU address region is contiguous as well. | 262 | // assumes the specified GPU address region is contiguous as well. |
| 263 | 263 | ||
| 264 | u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1; | 264 | u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; |
| 265 | for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) { | 265 | for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { |
| 266 | boost::optional<VAddr> maybe_vaddr = | ||
| 267 | Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | ||
| 268 | // The GPU <-> CPU virtual memory mapping is not 1:1 | ||
| 269 | if (!maybe_vaddr) { | ||
| 270 | LOG_ERROR(HW_Memory, | ||
| 271 | "Trying to flush a cached region to an invalid physical address {:016X}", | ||
| 272 | gpu_addr); | ||
| 273 | continue; | ||
| 274 | } | ||
| 275 | VAddr vaddr = *maybe_vaddr; | ||
| 276 | |||
| 277 | PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; | 266 | PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; |
| 278 | 267 | ||
| 279 | if (cached) { | 268 | if (cached) { |
| @@ -344,29 +333,19 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 344 | 333 | ||
| 345 | const VAddr overlap_start = std::max(start, region_start); | 334 | const VAddr overlap_start = std::max(start, region_start); |
| 346 | const VAddr overlap_end = std::min(end, region_end); | 335 | const VAddr overlap_end = std::min(end, region_end); |
| 347 | |||
| 348 | const std::vector<Tegra::GPUVAddr> gpu_addresses = | ||
| 349 | system_instance.GPU().MemoryManager().CpuToGpuAddress(overlap_start); | ||
| 350 | |||
| 351 | if (gpu_addresses.empty()) { | ||
| 352 | return; | ||
| 353 | } | ||
| 354 | |||
| 355 | const u64 overlap_size = overlap_end - overlap_start; | 336 | const u64 overlap_size = overlap_end - overlap_start; |
| 356 | 337 | ||
| 357 | for (const auto& gpu_address : gpu_addresses) { | 338 | auto& rasterizer = system_instance.Renderer().Rasterizer(); |
| 358 | auto& rasterizer = system_instance.Renderer().Rasterizer(); | 339 | switch (mode) { |
| 359 | switch (mode) { | 340 | case FlushMode::Flush: |
| 360 | case FlushMode::Flush: | 341 | rasterizer.FlushRegion(overlap_start, overlap_size); |
| 361 | rasterizer.FlushRegion(gpu_address, overlap_size); | 342 | break; |
| 362 | break; | 343 | case FlushMode::Invalidate: |
| 363 | case FlushMode::Invalidate: | 344 | rasterizer.InvalidateRegion(overlap_start, overlap_size); |
| 364 | rasterizer.InvalidateRegion(gpu_address, overlap_size); | 345 | break; |
| 365 | break; | 346 | case FlushMode::FlushAndInvalidate: |
| 366 | case FlushMode::FlushAndInvalidate: | 347 | rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); |
| 367 | rasterizer.FlushAndInvalidateRegion(gpu_address, overlap_size); | 348 | break; |
| 368 | break; | ||
| 369 | } | ||
| 370 | } | 349 | } |
| 371 | }; | 350 | }; |
| 372 | 351 | ||
diff --git a/src/core/memory.h b/src/core/memory.h index b7fb3b9ed..f06e04a75 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <boost/icl/interval_map.hpp> | 11 | #include <boost/icl/interval_map.hpp> |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "core/memory_hook.h" | 13 | #include "core/memory_hook.h" |
| 14 | #include "video_core/memory_manager.h" | ||
| 15 | 14 | ||
| 16 | namespace Kernel { | 15 | namespace Kernel { |
| 17 | class Process; | 16 | class Process; |
| @@ -179,7 +178,7 @@ enum class FlushMode { | |||
| 179 | /** | 178 | /** |
| 180 | * Mark each page touching the region as cached. | 179 | * Mark each page touching the region as cached. |
| 181 | */ | 180 | */ |
| 182 | void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached); | 181 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); |
| 183 | 182 | ||
| 184 | /** | 183 | /** |
| 185 | * Flushes and invalidates any externally cached rasterizer resources touching the given virtual | 184 | * Flushes and invalidates any externally cached rasterizer resources touching the given virtual |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 7a0492a4e..de1eab86b 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -4,113 +4,87 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <unordered_map> | 7 | #include <set> |
| 8 | |||
| 8 | #include <boost/icl/interval_map.hpp> | 9 | #include <boost/icl/interval_map.hpp> |
| 9 | #include <boost/range/iterator_range.hpp> | ||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "core/core.h" | ||
| 12 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| 13 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/rasterizer_interface.h" | ||
| 16 | #include "video_core/renderer_base.h" | ||
| 14 | 17 | ||
| 15 | template <class T> | 18 | template <class T> |
| 16 | class RasterizerCache : NonCopyable { | 19 | class RasterizerCache : NonCopyable { |
| 17 | public: | 20 | public: |
| 18 | /// Mark the specified region as being invalidated | 21 | /// Mark the specified region as being invalidated |
| 19 | void InvalidateRegion(Tegra::GPUVAddr region_addr, size_t region_size) { | 22 | void InvalidateRegion(VAddr addr, u64 size) { |
| 20 | for (auto iter = cached_objects.cbegin(); iter != cached_objects.cend();) { | 23 | if (size == 0) |
| 21 | const auto& object{iter->second}; | 24 | return; |
| 22 | 25 | ||
| 23 | ++iter; | 26 | const ObjectInterval interval{addr, addr + size}; |
| 27 | for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) { | ||
| 28 | for (auto& cached_object : pair.second) { | ||
| 29 | if (!cached_object) | ||
| 30 | continue; | ||
| 24 | 31 | ||
| 25 | if (object->GetAddr() <= (region_addr + region_size) && | 32 | remove_objects.emplace(cached_object); |
| 26 | region_addr <= (object->GetAddr() + object->GetSizeInBytes())) { | ||
| 27 | // Regions overlap, so invalidate | ||
| 28 | Unregister(object); | ||
| 29 | } | 33 | } |
| 30 | } | 34 | } |
| 35 | |||
| 36 | for (auto& remove_object : remove_objects) { | ||
| 37 | Unregister(remove_object); | ||
| 38 | } | ||
| 39 | |||
| 40 | remove_objects.clear(); | ||
| 41 | } | ||
| 42 | |||
| 43 | /// Invalidates everything in the cache | ||
| 44 | void InvalidateAll() { | ||
| 45 | while (object_cache.begin() != object_cache.end()) { | ||
| 46 | Unregister(*object_cache.begin()->second.begin()); | ||
| 47 | } | ||
| 31 | } | 48 | } |
| 32 | 49 | ||
| 33 | protected: | 50 | protected: |
| 34 | /// Tries to get an object from the cache with the specified address | 51 | /// Tries to get an object from the cache with the specified address |
| 35 | T TryGet(Tegra::GPUVAddr addr) const { | 52 | T TryGet(VAddr addr) const { |
| 36 | const auto& search{cached_objects.find(addr)}; | 53 | const ObjectInterval interval{addr}; |
| 37 | if (search != cached_objects.end()) { | 54 | for (auto& pair : boost::make_iterator_range(object_cache.equal_range(interval))) { |
| 38 | return search->second; | 55 | for (auto& cached_object : pair.second) { |
| 56 | if (cached_object->GetAddr() == addr) { | ||
| 57 | return cached_object; | ||
| 58 | } | ||
| 59 | } | ||
| 39 | } | 60 | } |
| 40 | |||
| 41 | return nullptr; | 61 | return nullptr; |
| 42 | } | 62 | } |
| 43 | 63 | ||
| 44 | /// Gets a reference to the cache | ||
| 45 | const std::unordered_map<Tegra::GPUVAddr, T>& GetCache() const { | ||
| 46 | return cached_objects; | ||
| 47 | } | ||
| 48 | |||
| 49 | /// Register an object into the cache | 64 | /// Register an object into the cache |
| 50 | void Register(const T& object) { | 65 | void Register(const T& object) { |
| 51 | const auto& search{cached_objects.find(object->GetAddr())}; | 66 | object_cache.add({GetInterval(object), ObjectSet{object}}); |
| 52 | if (search != cached_objects.end()) { | 67 | auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer(); |
| 53 | // Registered already | 68 | rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); |
| 54 | return; | ||
| 55 | } | ||
| 56 | |||
| 57 | cached_objects[object->GetAddr()] = object; | ||
| 58 | UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); | ||
| 59 | } | 69 | } |
| 60 | 70 | ||
| 61 | /// Unregisters an object from the cache | 71 | /// Unregisters an object from the cache |
| 62 | void Unregister(const T& object) { | 72 | void Unregister(const T& object) { |
| 63 | const auto& search{cached_objects.find(object->GetAddr())}; | 73 | auto& rasterizer = Core::System::GetInstance().Renderer().Rasterizer(); |
| 64 | if (search == cached_objects.end()) { | 74 | rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); |
| 65 | // Unregistered already | 75 | object_cache.subtract({GetInterval(object), ObjectSet{object}}); |
| 66 | return; | ||
| 67 | } | ||
| 68 | |||
| 69 | UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); | ||
| 70 | cached_objects.erase(search); | ||
| 71 | } | 76 | } |
| 72 | 77 | ||
| 73 | private: | 78 | private: |
| 74 | using PageMap = boost::icl::interval_map<u64, int>; | 79 | using ObjectSet = std::set<T>; |
| 75 | 80 | using ObjectCache = boost::icl::interval_map<VAddr, ObjectSet>; | |
| 76 | template <typename Map, typename Interval> | 81 | using ObjectInterval = typename ObjectCache::interval_type; |
| 77 | constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||
| 78 | return boost::make_iterator_range(map.equal_range(interval)); | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Increase/decrease the number of object in pages touching the specified region | ||
| 82 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | ||
| 83 | const u64 page_start{addr >> Tegra::MemoryManager::PAGE_BITS}; | ||
| 84 | const u64 page_end{(addr + size) >> Tegra::MemoryManager::PAGE_BITS}; | ||
| 85 | |||
| 86 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||
| 87 | // subtract after iterating | ||
| 88 | const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); | ||
| 89 | if (delta > 0) | ||
| 90 | cached_pages.add({pages_interval, delta}); | ||
| 91 | |||
| 92 | for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||
| 93 | const auto interval = pair.first & pages_interval; | ||
| 94 | const int count = pair.second; | ||
| 95 | |||
| 96 | const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval) | ||
| 97 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 98 | const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval) | ||
| 99 | << Tegra::MemoryManager::PAGE_BITS; | ||
| 100 | const u64 interval_size = interval_end_addr - interval_start_addr; | ||
| 101 | |||
| 102 | if (delta > 0 && count == delta) | ||
| 103 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||
| 104 | else if (delta < 0 && count == -delta) | ||
| 105 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||
| 106 | else | ||
| 107 | ASSERT(count >= 0); | ||
| 108 | } | ||
| 109 | 82 | ||
| 110 | if (delta < 0) | 83 | static auto GetInterval(const T& object) { |
| 111 | cached_pages.add({pages_interval, delta}); | 84 | return ObjectInterval::right_open(object->GetAddr(), |
| 85 | object->GetAddr() + object->GetSizeInBytes()); | ||
| 112 | } | 86 | } |
| 113 | 87 | ||
| 114 | std::unordered_map<Tegra::GPUVAddr, T> cached_objects; | 88 | ObjectCache object_cache; |
| 115 | PageMap cached_pages; | 89 | ObjectSet remove_objects; |
| 116 | }; | 90 | }; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a4a219d8d..9d78e8b6b 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -27,14 +27,14 @@ public: | |||
| 27 | virtual void FlushAll() = 0; | 27 | virtual void FlushAll() = 0; |
| 28 | 28 | ||
| 29 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 29 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 30 | virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0; | 30 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 31 | 31 | ||
| 32 | /// Notify rasterizer that any caches of the specified region should be invalidated | 32 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 33 | virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; | 33 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 34 | 34 | ||
| 35 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 35 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 36 | /// and invalidated | 36 | /// and invalidated |
| 37 | virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0; | 37 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| 38 | 38 | ||
| 39 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 | 39 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 |
| 40 | virtual bool AccelerateDisplayTransfer(const void* config) { | 40 | virtual bool AccelerateDisplayTransfer(const void* config) { |
| @@ -60,5 +60,8 @@ public: | |||
| 60 | virtual bool AccelerateDrawBatch(bool is_indexed) { | 60 | virtual bool AccelerateDrawBatch(bool is_indexed) { |
| 61 | return false; | 61 | return false; |
| 62 | } | 62 | } |
| 63 | |||
| 64 | /// Increase/decrease the number of object in pages touching the specified region | ||
| 65 | virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} | ||
| 63 | }; | 66 | }; |
| 64 | } // namespace VideoCore | 67 | } // namespace VideoCore |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f014183b8..7ce969f73 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -274,6 +274,41 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | |||
| 274 | return true; | 274 | return true; |
| 275 | } | 275 | } |
| 276 | 276 | ||
| 277 | template <typename Map, typename Interval> | ||
| 278 | static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||
| 279 | return boost::make_iterator_range(map.equal_range(interval)); | ||
| 280 | } | ||
| 281 | |||
| 282 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||
| 283 | const u64 page_start{addr >> Memory::PAGE_BITS}; | ||
| 284 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | ||
| 285 | |||
| 286 | // Interval maps will erase segments if count reaches 0, so if delta is negative we have to | ||
| 287 | // subtract after iterating | ||
| 288 | const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end); | ||
| 289 | if (delta > 0) | ||
| 290 | cached_pages.add({pages_interval, delta}); | ||
| 291 | |||
| 292 | for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { | ||
| 293 | const auto interval = pair.first & pages_interval; | ||
| 294 | const int count = pair.second; | ||
| 295 | |||
| 296 | const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; | ||
| 297 | const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; | ||
| 298 | const u64 interval_size = interval_end_addr - interval_start_addr; | ||
| 299 | |||
| 300 | if (delta > 0 && count == delta) | ||
| 301 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); | ||
| 302 | else if (delta < 0 && count == -delta) | ||
| 303 | Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); | ||
| 304 | else | ||
| 305 | ASSERT(count >= 0); | ||
| 306 | } | ||
| 307 | |||
| 308 | if (delta < 0) | ||
| 309 | cached_pages.add({pages_interval, delta}); | ||
| 310 | } | ||
| 311 | |||
| 277 | std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, | 312 | std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, |
| 278 | bool using_depth_fb, | 313 | bool using_depth_fb, |
| 279 | bool preserve_contents) { | 314 | bool preserve_contents) { |
| @@ -397,16 +432,6 @@ void RasterizerOpenGL::Clear() { | |||
| 397 | glClearStencil(regs.clear_stencil); | 432 | glClearStencil(regs.clear_stencil); |
| 398 | 433 | ||
| 399 | glClear(clear_mask); | 434 | glClear(clear_mask); |
| 400 | |||
| 401 | // Mark framebuffer surfaces as dirty | ||
| 402 | if (Settings::values.use_accurate_framebuffers) { | ||
| 403 | if (dirty_color_surface != nullptr) { | ||
| 404 | res_cache.FlushSurface(dirty_color_surface); | ||
| 405 | } | ||
| 406 | if (dirty_depth_surface != nullptr) { | ||
| 407 | res_cache.FlushSurface(dirty_depth_surface); | ||
| 408 | } | ||
| 409 | } | ||
| 410 | } | 435 | } |
| 411 | 436 | ||
| 412 | std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, | 437 | std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, |
| @@ -522,16 +547,6 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 522 | texture_unit.Unbind(); | 547 | texture_unit.Unbind(); |
| 523 | } | 548 | } |
| 524 | state.Apply(); | 549 | state.Apply(); |
| 525 | |||
| 526 | // Mark framebuffer surfaces as dirty | ||
| 527 | if (Settings::values.use_accurate_framebuffers) { | ||
| 528 | if (dirty_color_surface != nullptr) { | ||
| 529 | res_cache.FlushSurface(dirty_color_surface); | ||
| 530 | } | ||
| 531 | if (dirty_depth_surface != nullptr) { | ||
| 532 | res_cache.FlushSurface(dirty_depth_surface); | ||
| 533 | } | ||
| 534 | } | ||
| 535 | } | 550 | } |
| 536 | 551 | ||
| 537 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} | 552 | void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} |
| @@ -540,17 +555,17 @@ void RasterizerOpenGL::FlushAll() { | |||
| 540 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 555 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 541 | } | 556 | } |
| 542 | 557 | ||
| 543 | void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { | 558 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 544 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 559 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 545 | } | 560 | } |
| 546 | 561 | ||
| 547 | void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 562 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| 548 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 563 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 549 | res_cache.InvalidateRegion(addr, size); | 564 | res_cache.InvalidateRegion(addr, size); |
| 550 | shader_cache.InvalidateRegion(addr, size); | 565 | shader_cache.InvalidateRegion(addr, size); |
| 551 | } | 566 | } |
| 552 | 567 | ||
| 553 | void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { | 568 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 554 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 569 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 555 | InvalidateRegion(addr, size); | 570 | InvalidateRegion(addr, size); |
| 556 | } | 571 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7dd329efe..30045ebff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -10,7 +10,11 @@ | |||
| 10 | #include <tuple> | 10 | #include <tuple> |
| 11 | #include <utility> | 11 | #include <utility> |
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | |||
| 14 | #include <boost/icl/interval_map.hpp> | ||
| 15 | #include <boost/range/iterator_range.hpp> | ||
| 13 | #include <glad/glad.h> | 16 | #include <glad/glad.h> |
| 17 | |||
| 14 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 15 | #include "video_core/engines/maxwell_3d.h" | 19 | #include "video_core/engines/maxwell_3d.h" |
| 16 | #include "video_core/memory_manager.h" | 20 | #include "video_core/memory_manager.h" |
| @@ -40,15 +44,16 @@ public: | |||
| 40 | void Clear() override; | 44 | void Clear() override; |
| 41 | void NotifyMaxwellRegisterChanged(u32 method) override; | 45 | void NotifyMaxwellRegisterChanged(u32 method) override; |
| 42 | void FlushAll() override; | 46 | void FlushAll() override; |
| 43 | void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; | 47 | void FlushRegion(VAddr addr, u64 size) override; |
| 44 | void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; | 48 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 45 | void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override; | 49 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 46 | bool AccelerateDisplayTransfer(const void* config) override; | 50 | bool AccelerateDisplayTransfer(const void* config) override; |
| 47 | bool AccelerateTextureCopy(const void* config) override; | 51 | bool AccelerateTextureCopy(const void* config) override; |
| 48 | bool AccelerateFill(const void* config) override; | 52 | bool AccelerateFill(const void* config) override; |
| 49 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 53 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 50 | u32 pixel_stride) override; | 54 | u32 pixel_stride) override; |
| 51 | bool AccelerateDrawBatch(bool is_indexed) override; | 55 | bool AccelerateDrawBatch(bool is_indexed) override; |
| 56 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; | ||
| 52 | 57 | ||
| 53 | /// OpenGL shader generated for a given Maxwell register state | 58 | /// OpenGL shader generated for a given Maxwell register state |
| 54 | struct MaxwellShader { | 59 | struct MaxwellShader { |
| @@ -187,6 +192,9 @@ private: | |||
| 187 | 192 | ||
| 188 | enum class AccelDraw { Disabled, Arrays, Indexed }; | 193 | enum class AccelDraw { Disabled, Arrays, Indexed }; |
| 189 | AccelDraw accelerate_draw = AccelDraw::Disabled; | 194 | AccelDraw accelerate_draw = AccelDraw::Disabled; |
| 195 | |||
| 196 | using CachedPageMap = boost::icl::interval_map<u64, int>; | ||
| 197 | CachedPageMap cached_pages; | ||
| 190 | }; | 198 | }; |
| 191 | 199 | ||
| 192 | } // namespace OpenGL | 200 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index e53a1a2ec..1965ab7d5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -33,11 +33,16 @@ struct FormatTuple { | |||
| 33 | bool compressed; | 33 | bool compressed; |
| 34 | }; | 34 | }; |
| 35 | 35 | ||
| 36 | static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { | ||
| 37 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 38 | const auto cpu_addr{gpu.MemoryManager().GpuToCpuAddress(gpu_addr)}; | ||
| 39 | return cpu_addr ? *cpu_addr : 0; | ||
| 40 | } | ||
| 41 | |||
| 36 | /*static*/ SurfaceParams SurfaceParams::CreateForTexture( | 42 | /*static*/ SurfaceParams SurfaceParams::CreateForTexture( |
| 37 | const Tegra::Texture::FullTextureInfo& config) { | 43 | const Tegra::Texture::FullTextureInfo& config) { |
| 38 | |||
| 39 | SurfaceParams params{}; | 44 | SurfaceParams params{}; |
| 40 | params.addr = config.tic.Address(); | 45 | params.addr = TryGetCpuAddr(config.tic.Address()); |
| 41 | params.is_tiled = config.tic.IsTiled(); | 46 | params.is_tiled = config.tic.IsTiled(); |
| 42 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, | 47 | params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, |
| 43 | params.pixel_format = | 48 | params.pixel_format = |
| @@ -55,9 +60,8 @@ struct FormatTuple { | |||
| 55 | 60 | ||
| 56 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( | 61 | /*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( |
| 57 | const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { | 62 | const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { |
| 58 | |||
| 59 | SurfaceParams params{}; | 63 | SurfaceParams params{}; |
| 60 | params.addr = config.Address(); | 64 | params.addr = TryGetCpuAddr(config.Address()); |
| 61 | params.is_tiled = true; | 65 | params.is_tiled = true; |
| 62 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 66 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; |
| 63 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); | 67 | params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
| @@ -75,9 +79,8 @@ struct FormatTuple { | |||
| 75 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, | 79 | /*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, |
| 76 | Tegra::GPUVAddr zeta_address, | 80 | Tegra::GPUVAddr zeta_address, |
| 77 | Tegra::DepthFormat format) { | 81 | Tegra::DepthFormat format) { |
| 78 | |||
| 79 | SurfaceParams params{}; | 82 | SurfaceParams params{}; |
| 80 | params.addr = zeta_address; | 83 | params.addr = TryGetCpuAddr(zeta_address); |
| 81 | params.is_tiled = true; | 84 | params.is_tiled = true; |
| 82 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; | 85 | params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; |
| 83 | params.pixel_format = PixelFormatFromDepthFormat(format); | 86 | params.pixel_format = PixelFormatFromDepthFormat(format); |
| @@ -171,11 +174,6 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | |||
| 171 | return format; | 174 | return format; |
| 172 | } | 175 | } |
| 173 | 176 | ||
| 174 | VAddr SurfaceParams::GetCpuAddr() const { | ||
| 175 | auto& gpu = Core::System::GetInstance().GPU(); | ||
| 176 | return *gpu.MemoryManager().GpuToCpuAddress(addr); | ||
| 177 | } | ||
| 178 | |||
| 179 | static bool IsPixelFormatASTC(PixelFormat format) { | 177 | static bool IsPixelFormatASTC(PixelFormat format) { |
| 180 | switch (format) { | 178 | switch (format) { |
| 181 | case PixelFormat::ASTC_2D_4X4: | 179 | case PixelFormat::ASTC_2D_4X4: |
| @@ -222,33 +220,28 @@ static bool IsFormatBCn(PixelFormat format) { | |||
| 222 | } | 220 | } |
| 223 | 221 | ||
| 224 | template <bool morton_to_gl, PixelFormat format> | 222 | template <bool morton_to_gl, PixelFormat format> |
| 225 | void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, | 223 | void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, VAddr addr) { |
| 226 | Tegra::GPUVAddr addr) { | ||
| 227 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; | 224 | constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
| 228 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); | 225 | constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
| 229 | auto& gpu = Core::System::GetInstance().GPU(); | ||
| 230 | 226 | ||
| 231 | if (morton_to_gl) { | 227 | if (morton_to_gl) { |
| 232 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual | 228 | // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual |
| 233 | // pixel values. | 229 | // pixel values. |
| 234 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; | 230 | const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; |
| 235 | const std::vector<u8> data = | 231 | const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( |
| 236 | Tegra::Texture::UnswizzleTexture(*gpu.MemoryManager().GpuToCpuAddress(addr), tile_size, | 232 | addr, tile_size, bytes_per_pixel, stride, height, block_height); |
| 237 | bytes_per_pixel, stride, height, block_height); | ||
| 238 | const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; | 233 | const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; |
| 239 | gl_buffer.assign(data.begin(), data.begin() + size_to_copy); | 234 | gl_buffer.assign(data.begin(), data.begin() + size_to_copy); |
| 240 | } else { | 235 | } else { |
| 241 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should | 236 | // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should |
| 242 | // check the configuration for this and perform more generic un/swizzle | 237 | // check the configuration for this and perform more generic un/swizzle |
| 243 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); | 238 | LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); |
| 244 | VideoCore::MortonCopyPixels128( | 239 | VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, |
| 245 | stride, height, bytes_per_pixel, gl_bytes_per_pixel, | 240 | Memory::GetPointer(addr), gl_buffer.data(), morton_to_gl); |
| 246 | Memory::GetPointer(*gpu.MemoryManager().GpuToCpuAddress(addr)), gl_buffer.data(), | ||
| 247 | morton_to_gl); | ||
| 248 | } | 241 | } |
| 249 | } | 242 | } |
| 250 | 243 | ||
| 251 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), | 244 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr), |
| 252 | SurfaceParams::MaxPixelFormat> | 245 | SurfaceParams::MaxPixelFormat> |
| 253 | morton_to_gl_fns = { | 246 | morton_to_gl_fns = { |
| 254 | // clang-format off | 247 | // clang-format off |
| @@ -305,7 +298,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU | |||
| 305 | // clang-format on | 298 | // clang-format on |
| 306 | }; | 299 | }; |
| 307 | 300 | ||
| 308 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), | 301 | static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr), |
| 309 | SurfaceParams::MaxPixelFormat> | 302 | SurfaceParams::MaxPixelFormat> |
| 310 | gl_to_morton_fns = { | 303 | gl_to_morton_fns = { |
| 311 | // clang-format off | 304 | // clang-format off |
| @@ -542,7 +535,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 | |||
| 542 | void CachedSurface::LoadGLBuffer() { | 535 | void CachedSurface::LoadGLBuffer() { |
| 543 | ASSERT(params.type != SurfaceType::Fill); | 536 | ASSERT(params.type != SurfaceType::Fill); |
| 544 | 537 | ||
| 545 | const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); | 538 | const u8* const texture_src_data = Memory::GetPointer(params.addr); |
| 546 | 539 | ||
| 547 | ASSERT(texture_src_data); | 540 | ASSERT(texture_src_data); |
| 548 | 541 | ||
| @@ -567,7 +560,7 @@ void CachedSurface::LoadGLBuffer() { | |||
| 567 | 560 | ||
| 568 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); | 561 | MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); |
| 569 | void CachedSurface::FlushGLBuffer() { | 562 | void CachedSurface::FlushGLBuffer() { |
| 570 | u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); | 563 | u8* const dst_buffer = Memory::GetPointer(params.addr); |
| 571 | 564 | ||
| 572 | ASSERT(dst_buffer); | 565 | ASSERT(dst_buffer); |
| 573 | ASSERT(gl_buffer.size() == | 566 | ASSERT(gl_buffer.size() == |
| @@ -764,19 +757,10 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 764 | return {}; | 757 | return {}; |
| 765 | } | 758 | } |
| 766 | 759 | ||
| 767 | auto& gpu = Core::System::GetInstance().GPU(); | ||
| 768 | // Don't try to create any entries in the cache if the address of the texture is invalid. | ||
| 769 | if (gpu.MemoryManager().GpuToCpuAddress(params.addr) == boost::none) | ||
| 770 | return {}; | ||
| 771 | |||
| 772 | // Look up surface in the cache based on address | 760 | // Look up surface in the cache based on address |
| 773 | Surface surface{TryGet(params.addr)}; | 761 | Surface surface{TryGet(params.addr)}; |
| 774 | if (surface) { | 762 | if (surface) { |
| 775 | if (Settings::values.use_accurate_framebuffers) { | 763 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { |
| 776 | // If use_accurate_framebuffers is enabled, always load from memory | ||
| 777 | FlushSurface(surface); | ||
| 778 | Unregister(surface); | ||
| 779 | } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | ||
| 780 | // Use the cached surface as-is | 764 | // Use the cached surface as-is |
| 781 | return surface; | 765 | return surface; |
| 782 | } else if (preserve_contents) { | 766 | } else if (preserve_contents) { |
| @@ -792,15 +776,9 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 792 | } | 776 | } |
| 793 | } | 777 | } |
| 794 | 778 | ||
| 795 | // Try to get a previously reserved surface | 779 | // No cached surface found - get a new one |
| 796 | surface = TryGetReservedSurface(params); | 780 | surface = GetUncachedSurface(params); |
| 797 | 781 | Register(surface); | |
| 798 | // No surface found - create a new one | ||
| 799 | if (!surface) { | ||
| 800 | surface = std::make_shared<CachedSurface>(params); | ||
| 801 | ReserveSurface(surface); | ||
| 802 | Register(surface); | ||
| 803 | } | ||
| 804 | 782 | ||
| 805 | // Only load surface from memory if we care about the contents | 783 | // Only load surface from memory if we care about the contents |
| 806 | if (preserve_contents) { | 784 | if (preserve_contents) { |
| @@ -810,13 +788,23 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 810 | return surface; | 788 | return surface; |
| 811 | } | 789 | } |
| 812 | 790 | ||
| 791 | Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { | ||
| 792 | Surface surface{TryGetReservedSurface(params)}; | ||
| 793 | if (!surface) { | ||
| 794 | // No reserved surface available, create a new one and reserve it | ||
| 795 | surface = std::make_shared<CachedSurface>(params); | ||
| 796 | ReserveSurface(surface); | ||
| 797 | } | ||
| 798 | return surface; | ||
| 799 | } | ||
| 800 | |||
| 813 | Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | 801 | Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, |
| 814 | const SurfaceParams& new_params) { | 802 | const SurfaceParams& new_params) { |
| 815 | // Verify surface is compatible for blitting | 803 | // Verify surface is compatible for blitting |
| 816 | const auto& params{surface->GetSurfaceParams()}; | 804 | const auto& params{surface->GetSurfaceParams()}; |
| 817 | 805 | ||
| 818 | // Create a new surface with the new parameters, and blit the previous surface to it | 806 | // Get a new surface with the new parameters, and blit the previous surface to it |
| 819 | Surface new_surface{std::make_shared<CachedSurface>(new_params)}; | 807 | Surface new_surface{GetUncachedSurface(new_params)}; |
| 820 | 808 | ||
| 821 | // If format is unchanged, we can do a faster blit without reinterpreting pixel data | 809 | // If format is unchanged, we can do a faster blit without reinterpreting pixel data |
| 822 | if (params.pixel_format == new_params.pixel_format) { | 810 | if (params.pixel_format == new_params.pixel_format) { |
| @@ -826,92 +814,73 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, | |||
| 826 | return new_surface; | 814 | return new_surface; |
| 827 | } | 815 | } |
| 828 | 816 | ||
| 829 | auto source_format = GetFormatTuple(params.pixel_format, params.component_type); | 817 | // When using accurate framebuffers, always copy old data to new surface, regardless of format |
| 830 | auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); | 818 | if (Settings::values.use_accurate_framebuffers) { |
| 819 | auto source_format = GetFormatTuple(params.pixel_format, params.component_type); | ||
| 820 | auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); | ||
| 831 | 821 | ||
| 832 | size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); | 822 | size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); |
| 833 | 823 | ||
| 834 | // Use a Pixel Buffer Object to download the previous texture and then upload it to the new one | 824 | // Use a Pixel Buffer Object to download the previous texture and then upload it to the new |
| 835 | // using the new format. | 825 | // one using the new format. |
| 836 | OGLBuffer pbo; | 826 | OGLBuffer pbo; |
| 837 | pbo.Create(); | 827 | pbo.Create(); |
| 838 | 828 | ||
| 839 | glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle); | 829 | glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle); |
| 840 | glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); | 830 | glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); |
| 841 | if (source_format.compressed) { | 831 | if (source_format.compressed) { |
| 842 | glGetCompressedTextureImage(surface->Texture().handle, 0, | 832 | glGetCompressedTextureImage(surface->Texture().handle, 0, |
| 843 | static_cast<GLsizei>(params.SizeInBytes()), nullptr); | 833 | static_cast<GLsizei>(params.SizeInBytes()), nullptr); |
| 844 | } else { | 834 | } else { |
| 845 | glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type, | 835 | glGetTextureImage(surface->Texture().handle, 0, source_format.format, |
| 846 | static_cast<GLsizei>(params.SizeInBytes()), nullptr); | 836 | source_format.type, static_cast<GLsizei>(params.SizeInBytes()), |
| 847 | } | 837 | nullptr); |
| 848 | // If the new texture is bigger than the previous one, we need to fill in the rest with data | 838 | } |
| 849 | // from the CPU. | 839 | // If the new texture is bigger than the previous one, we need to fill in the rest with data |
| 850 | if (params.SizeInBytes() < new_params.SizeInBytes()) { | 840 | // from the CPU. |
| 851 | // Upload the rest of the memory. | 841 | if (params.SizeInBytes() < new_params.SizeInBytes()) { |
| 852 | if (new_params.is_tiled) { | 842 | // Upload the rest of the memory. |
| 853 | // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest of | 843 | if (new_params.is_tiled) { |
| 854 | // the data in this case. Games like Super Mario Odyssey seem to hit this case when | 844 | // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest |
| 855 | // drawing, it re-uses the memory of a previous texture as a bigger framebuffer but it | 845 | // of the data in this case. Games like Super Mario Odyssey seem to hit this case |
| 856 | // doesn't clear it beforehand, the texture is already full of zeros. | 846 | // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer |
| 857 | LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during " | 847 | // but it doesn't clear it beforehand, the texture is already full of zeros. |
| 858 | "reinterpretation but the texture is tiled."); | 848 | LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during " |
| 849 | "reinterpretation but the texture is tiled."); | ||
| 850 | } | ||
| 851 | size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); | ||
| 852 | std::vector<u8> data(remaining_size); | ||
| 853 | Memory::ReadBlock(new_params.addr + params.SizeInBytes(), data.data(), data.size()); | ||
| 854 | glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, | ||
| 855 | data.data()); | ||
| 859 | } | 856 | } |
| 860 | size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); | ||
| 861 | auto address = Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress( | ||
| 862 | new_params.addr + params.SizeInBytes()); | ||
| 863 | std::vector<u8> data(remaining_size); | ||
| 864 | Memory::ReadBlock(*address, data.data(), data.size()); | ||
| 865 | glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, data.data()); | ||
| 866 | } | ||
| 867 | |||
| 868 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); | ||
| 869 | |||
| 870 | const auto& dest_rect{new_params.GetRect()}; | ||
| 871 | |||
| 872 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle); | ||
| 873 | if (dest_format.compressed) { | ||
| 874 | glCompressedTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, | ||
| 875 | static_cast<GLsizei>(dest_rect.GetWidth()), | ||
| 876 | static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, | ||
| 877 | static_cast<GLsizei>(new_params.SizeInBytes()), nullptr); | ||
| 878 | } else { | ||
| 879 | glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0, | ||
| 880 | static_cast<GLsizei>(dest_rect.GetWidth()), | ||
| 881 | static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, | ||
| 882 | dest_format.type, nullptr); | ||
| 883 | } | ||
| 884 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); | ||
| 885 | 857 | ||
| 886 | pbo.Release(); | 858 | glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); |
| 887 | 859 | ||
| 888 | return new_surface; | 860 | const auto& dest_rect{new_params.GetRect()}; |
| 889 | } | ||
| 890 | 861 | ||
| 891 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { | 862 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle); |
| 892 | // Tries to find the GPU address of a framebuffer based on the CPU address. This is because | 863 | if (dest_format.compressed) { |
| 893 | // final output framebuffers are specified by CPU address, but internally our GPU cache uses | 864 | glCompressedTexSubImage2D( |
| 894 | // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU | 865 | GL_TEXTURE_2D, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()), |
| 895 | // address to the one provided. This is obviously not great, and won't work if the | 866 | static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, |
| 896 | // framebuffer overlaps surfaces. | 867 | static_cast<GLsizei>(new_params.SizeInBytes()), nullptr); |
| 897 | 868 | } else { | |
| 898 | std::vector<Surface> surfaces; | 869 | glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0, |
| 899 | for (const auto& surface : GetCache()) { | 870 | static_cast<GLsizei>(dest_rect.GetWidth()), |
| 900 | const auto& params = surface.second->GetSurfaceParams(); | 871 | static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, |
| 901 | const VAddr surface_cpu_addr = params.GetCpuAddr(); | 872 | dest_format.type, nullptr); |
| 902 | if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { | ||
| 903 | ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); | ||
| 904 | surfaces.push_back(surface.second); | ||
| 905 | } | 873 | } |
| 906 | } | 874 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); |
| 907 | 875 | ||
| 908 | if (surfaces.empty()) { | 876 | pbo.Release(); |
| 909 | return {}; | ||
| 910 | } | 877 | } |
| 911 | 878 | ||
| 912 | ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported"); | 879 | return new_surface; |
| 880 | } | ||
| 913 | 881 | ||
| 914 | return surfaces[0]; | 882 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { |
| 883 | return TryGet(addr); | ||
| 915 | } | 884 | } |
| 916 | 885 | ||
| 917 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | 886 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { |
| @@ -923,7 +892,6 @@ Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params | |||
| 923 | const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; | 892 | const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; |
| 924 | auto search{surface_reserve.find(surface_reserve_key)}; | 893 | auto search{surface_reserve.find(surface_reserve_key)}; |
| 925 | if (search != surface_reserve.end()) { | 894 | if (search != surface_reserve.end()) { |
| 926 | Register(search->second); | ||
| 927 | return search->second; | 895 | return search->second; |
| 928 | } | 896 | } |
| 929 | return {}; | 897 | return {}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index ecdd8d8e5..aad75f200 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -638,9 +638,6 @@ struct SurfaceParams { | |||
| 638 | GetFormatBpp(pixel_format) / CHAR_BIT; | 638 | GetFormatBpp(pixel_format) / CHAR_BIT; |
| 639 | } | 639 | } |
| 640 | 640 | ||
| 641 | /// Returns the CPU virtual address for this surface | ||
| 642 | VAddr GetCpuAddr() const; | ||
| 643 | |||
| 644 | /// Creates SurfaceParams from a texture configuration | 641 | /// Creates SurfaceParams from a texture configuration |
| 645 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); | 642 | static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); |
| 646 | 643 | ||
| @@ -653,25 +650,13 @@ struct SurfaceParams { | |||
| 653 | Tegra::GPUVAddr zeta_address, | 650 | Tegra::GPUVAddr zeta_address, |
| 654 | Tegra::DepthFormat format); | 651 | Tegra::DepthFormat format); |
| 655 | 652 | ||
| 656 | bool operator==(const SurfaceParams& other) const { | ||
| 657 | return std::tie(addr, is_tiled, block_height, pixel_format, component_type, type, width, | ||
| 658 | height, unaligned_height, size_in_bytes) == | ||
| 659 | std::tie(other.addr, other.is_tiled, other.block_height, other.pixel_format, | ||
| 660 | other.component_type, other.type, other.width, other.height, | ||
| 661 | other.unaligned_height, other.size_in_bytes); | ||
| 662 | } | ||
| 663 | |||
| 664 | bool operator!=(const SurfaceParams& other) const { | ||
| 665 | return !operator==(other); | ||
| 666 | } | ||
| 667 | |||
| 668 | /// Checks if surfaces are compatible for caching | 653 | /// Checks if surfaces are compatible for caching |
| 669 | bool IsCompatibleSurface(const SurfaceParams& other) const { | 654 | bool IsCompatibleSurface(const SurfaceParams& other) const { |
| 670 | return std::tie(pixel_format, type, cache_width, cache_height) == | 655 | return std::tie(pixel_format, type, cache_width, cache_height) == |
| 671 | std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); | 656 | std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); |
| 672 | } | 657 | } |
| 673 | 658 | ||
| 674 | Tegra::GPUVAddr addr; | 659 | VAddr addr; |
| 675 | bool is_tiled; | 660 | bool is_tiled; |
| 676 | u32 block_height; | 661 | u32 block_height; |
| 677 | PixelFormat pixel_format; | 662 | PixelFormat pixel_format; |
| @@ -712,7 +697,7 @@ class CachedSurface final { | |||
| 712 | public: | 697 | public: |
| 713 | CachedSurface(const SurfaceParams& params); | 698 | CachedSurface(const SurfaceParams& params); |
| 714 | 699 | ||
| 715 | Tegra::GPUVAddr GetAddr() const { | 700 | VAddr GetAddr() const { |
| 716 | return params.addr; | 701 | return params.addr; |
| 717 | } | 702 | } |
| 718 | 703 | ||
| @@ -763,13 +748,16 @@ public: | |||
| 763 | /// Flushes the surface to Switch memory | 748 | /// Flushes the surface to Switch memory |
| 764 | void FlushSurface(const Surface& surface); | 749 | void FlushSurface(const Surface& surface); |
| 765 | 750 | ||
| 766 | /// Tries to find a framebuffer GPU address based on the provided CPU address | 751 | /// Tries to find a framebuffer using on the provided CPU address |
| 767 | Surface TryFindFramebufferSurface(VAddr cpu_addr) const; | 752 | Surface TryFindFramebufferSurface(VAddr addr) const; |
| 768 | 753 | ||
| 769 | private: | 754 | private: |
| 770 | void LoadSurface(const Surface& surface); | 755 | void LoadSurface(const Surface& surface); |
| 771 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); | 756 | Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); |
| 772 | 757 | ||
| 758 | /// Gets an uncached surface, creating it if need be | ||
| 759 | Surface GetUncachedSurface(const SurfaceParams& params); | ||
| 760 | |||
| 773 | /// Recreates a surface with new parameters | 761 | /// Recreates a surface with new parameters |
| 774 | Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); | 762 | Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); |
| 775 | 763 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 326a901ba..ac9adfd83 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -12,21 +12,17 @@ | |||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | /// Gets the address for the specified shader stage program | 14 | /// Gets the address for the specified shader stage program |
| 15 | static Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { | 15 | static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { |
| 16 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | 16 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); |
| 17 | auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; | 17 | auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)]; |
| 18 | 18 | return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + | |
| 19 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | 19 | shader_config.offset); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | /// Gets the shader program code from memory for the specified address | 22 | /// Gets the shader program code from memory for the specified address |
| 23 | static GLShader::ProgramCode GetShaderCode(Tegra::GPUVAddr addr) { | 23 | static GLShader::ProgramCode GetShaderCode(VAddr addr) { |
| 24 | auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||
| 25 | |||
| 26 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); | 24 | GLShader::ProgramCode program_code(GLShader::MAX_PROGRAM_CODE_LENGTH); |
| 27 | const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(addr)}; | 25 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |
| 28 | Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64)); | ||
| 29 | |||
| 30 | return program_code; | 26 | return program_code; |
| 31 | } | 27 | } |
| 32 | 28 | ||
| @@ -55,7 +51,7 @@ static void SetShaderUniformBlockBindings(GLuint shader) { | |||
| 55 | sizeof(GLShader::MaxwellUniformData)); | 51 | sizeof(GLShader::MaxwellUniformData)); |
| 56 | } | 52 | } |
| 57 | 53 | ||
| 58 | CachedShader::CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type) | 54 | CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) |
| 59 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | 55 | : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { |
| 60 | 56 | ||
| 61 | GLShader::ProgramResult program_result; | 57 | GLShader::ProgramResult program_result; |
| @@ -113,7 +109,7 @@ GLint CachedShader::GetUniformLocation(const std::string& name) { | |||
| 113 | } | 109 | } |
| 114 | 110 | ||
| 115 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 111 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 116 | const Tegra::GPUVAddr program_addr{GetShaderAddress(program)}; | 112 | const VAddr program_addr{GetShaderAddress(program)}; |
| 117 | 113 | ||
| 118 | // Look up shader in the cache based on address | 114 | // Look up shader in the cache based on address |
| 119 | Shader shader{TryGet(program_addr)}; | 115 | Shader shader{TryGet(program_addr)}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 658f9e994..759987604 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/memory_manager.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 13 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -21,10 +20,10 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 21 | 20 | ||
| 22 | class CachedShader final { | 21 | class CachedShader final { |
| 23 | public: | 22 | public: |
| 24 | CachedShader(Tegra::GPUVAddr addr, Maxwell::ShaderProgram program_type); | 23 | CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); |
| 25 | 24 | ||
| 26 | /// Gets the address of the shader in guest memory, required for cache management | 25 | /// Gets the address of the shader in guest memory, required for cache management |
| 27 | Tegra::GPUVAddr GetAddr() const { | 26 | VAddr GetAddr() const { |
| 28 | return addr; | 27 | return addr; |
| 29 | } | 28 | } |
| 30 | 29 | ||
| @@ -50,7 +49,7 @@ public: | |||
| 50 | GLint GetUniformLocation(const std::string& name); | 49 | GLint GetUniformLocation(const std::string& name); |
| 51 | 50 | ||
| 52 | private: | 51 | private: |
| 53 | Tegra::GPUVAddr addr; | 52 | VAddr addr; |
| 54 | Maxwell::ShaderProgram program_type; | 53 | Maxwell::ShaderProgram program_type; |
| 55 | GLShader::ShaderSetup setup; | 54 | GLShader::ShaderSetup setup; |
| 56 | GLShader::ShaderEntries entries; | 55 | GLShader::ShaderEntries entries; |