summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/file_util.cpp22
-rw-r--r--src/common/thread.cpp9
-rw-r--r--src/core/file_sys/romfs.cpp3
-rw-r--r--src/core/memory.cpp127
-rw-r--r--src/core/memory.h78
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/buffer_cache/buffer_block.h42
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h145
-rw-r--r--src/video_core/buffer_cache/map_interval.h12
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/engines/shader_header.h55
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp6
-rw-r--r--src/video_core/gpu_asynch.h6
-rw-r--r--src/video_core/gpu_synch.cpp6
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h18
-rw-r--r--src/video_core/memory_manager.cpp93
-rw-r--r--src/video_core/memory_manager.h5
-rw-r--r--src/video_core/query_cache.h37
-rw-r--r--src/video_core/rasterizer_cache.h44
-rw-r--r--src/video_core/rasterizer_interface.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp34
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp211
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp45
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp38
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp15
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp34
-rw-r--r--src/video_core/shader/decode/image.cpp360
-rw-r--r--src/video_core/shader/decode/other.cpp48
-rw-r--r--src/video_core/shader/shader_ir.cpp3
-rw-r--r--src/video_core/shader/shader_ir.h4
-rw-r--r--src/video_core/surface.h97
-rw-r--r--src/video_core/texture_cache/surface_base.cpp81
-rw-r--r--src/video_core/texture_cache/surface_base.h49
-rw-r--r--src/video_core/texture_cache/surface_params.cpp34
-rw-r--r--src/video_core/texture_cache/surface_params.h36
-rw-r--r--src/video_core/texture_cache/texture_cache.h127
-rw-r--r--src/video_core/textures/texture.cpp80
-rw-r--r--src/video_core/textures/texture.h31
-rw-r--r--src/yuzu/about_dialog.cpp9
-rw-r--r--src/yuzu/bootmanager.cpp1
-rw-r--r--src/yuzu/configuration/configure_input_player.ui4
-rw-r--r--src/yuzu/main.cpp8
59 files changed, 1391 insertions, 870 deletions
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 41167f57a..35eee0096 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array> 5#include <array>
6#include <limits>
6#include <memory> 7#include <memory>
7#include <sstream> 8#include <sstream>
8#include <unordered_map> 9#include <unordered_map>
@@ -530,11 +531,11 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) {
530std::optional<std::string> GetCurrentDir() { 531std::optional<std::string> GetCurrentDir() {
531// Get the current working directory (getcwd uses malloc) 532// Get the current working directory (getcwd uses malloc)
532#ifdef _WIN32 533#ifdef _WIN32
533 wchar_t* dir; 534 wchar_t* dir = _wgetcwd(nullptr, 0);
534 if (!(dir = _wgetcwd(nullptr, 0))) { 535 if (!dir) {
535#else 536#else
536 char* dir; 537 char* dir = getcwd(nullptr, 0);
537 if (!(dir = getcwd(nullptr, 0))) { 538 if (!dir) {
538#endif 539#endif
539 LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg()); 540 LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg());
540 return {}; 541 return {};
@@ -918,19 +919,22 @@ void IOFile::Swap(IOFile& other) noexcept {
918 919
919bool IOFile::Open(const std::string& filename, const char openmode[], int flags) { 920bool IOFile::Open(const std::string& filename, const char openmode[], int flags) {
920 Close(); 921 Close();
922 bool m_good;
921#ifdef _WIN32 923#ifdef _WIN32
922 if (flags != 0) { 924 if (flags != 0) {
923 m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(), 925 m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(),
924 Common::UTF8ToUTF16W(openmode).c_str(), flags); 926 Common::UTF8ToUTF16W(openmode).c_str(), flags);
927 m_good = m_file != nullptr;
925 } else { 928 } else {
926 _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(), 929 m_good = _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(),
927 Common::UTF8ToUTF16W(openmode).c_str()); 930 Common::UTF8ToUTF16W(openmode).c_str()) == 0;
928 } 931 }
929#else 932#else
930 m_file = fopen(filename.c_str(), openmode); 933 m_file = std::fopen(filename.c_str(), openmode);
934 m_good = m_file != nullptr;
931#endif 935#endif
932 936
933 return IsOpen(); 937 return m_good;
934} 938}
935 939
936bool IOFile::Close() { 940bool IOFile::Close() {
@@ -956,7 +960,7 @@ u64 IOFile::Tell() const {
956 if (IsOpen()) 960 if (IsOpen())
957 return ftello(m_file); 961 return ftello(m_file);
958 962
959 return -1; 963 return std::numeric_limits<u64>::max();
960} 964}
961 965
962bool IOFile::Flush() { 966bool IOFile::Flush() {
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index fe7a420cc..0cd2d10bf 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -28,11 +28,8 @@ namespace Common {
28#ifdef _MSC_VER 28#ifdef _MSC_VER
29 29
30// Sets the debugger-visible name of the current thread. 30// Sets the debugger-visible name of the current thread.
31// Uses undocumented (actually, it is now documented) trick. 31// Uses trick documented in:
32// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp 32// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code
33
34// This is implemented much nicer in upcoming msvc++, see:
35// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx
36void SetCurrentThreadName(const char* name) { 33void SetCurrentThreadName(const char* name) {
37 static const DWORD MS_VC_EXCEPTION = 0x406D1388; 34 static const DWORD MS_VC_EXCEPTION = 0x406D1388;
38 35
@@ -47,7 +44,7 @@ void SetCurrentThreadName(const char* name) {
47 44
48 info.dwType = 0x1000; 45 info.dwType = 0x1000;
49 info.szName = name; 46 info.szName = name;
50 info.dwThreadID = -1; // dwThreadID; 47 info.dwThreadID = std::numeric_limits<DWORD>::max();
51 info.dwFlags = 0; 48 info.dwFlags = 0;
52 49
53 __try { 50 __try {
diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp
index c909d1ce4..120032134 100644
--- a/src/core/file_sys/romfs.cpp
+++ b/src/core/file_sys/romfs.cpp
@@ -5,6 +5,7 @@
5#include <memory> 5#include <memory>
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/string_util.h"
8#include "common/swap.h" 9#include "common/swap.h"
9#include "core/file_sys/fsmitm_romfsbuild.h" 10#include "core/file_sys/fsmitm_romfsbuild.h"
10#include "core/file_sys/romfs.h" 11#include "core/file_sys/romfs.h"
@@ -126,7 +127,7 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
126 return out->GetSubdirectories().front(); 127 return out->GetSubdirectories().front();
127 128
128 while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) { 129 while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) {
129 if (out->GetSubdirectories().front()->GetName() == "data" && 130 if (Common::ToLower(out->GetSubdirectories().front()->GetName()) == "data" &&
130 type == RomFSExtractionType::Truncated) 131 type == RomFSExtractionType::Truncated)
131 break; 132 break;
132 out = out->GetSubdirectories().front(); 133 out = out->GetSubdirectories().front();
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f0888327f..6061d37ae 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -242,7 +242,52 @@ struct Memory::Impl {
242 } 242 }
243 case Common::PageType::RasterizerCachedMemory: { 243 case Common::PageType::RasterizerCachedMemory: {
244 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 244 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
245 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); 245 system.GPU().FlushRegion(current_vaddr, copy_amount);
246 std::memcpy(dest_buffer, host_ptr, copy_amount);
247 break;
248 }
249 default:
250 UNREACHABLE();
251 }
252
253 page_index++;
254 page_offset = 0;
255 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
256 remaining_size -= copy_amount;
257 }
258 }
259
260 void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
261 const std::size_t size) {
262 const auto& page_table = process.VMManager().page_table;
263
264 std::size_t remaining_size = size;
265 std::size_t page_index = src_addr >> PAGE_BITS;
266 std::size_t page_offset = src_addr & PAGE_MASK;
267
268 while (remaining_size > 0) {
269 const std::size_t copy_amount =
270 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
271 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
272
273 switch (page_table.attributes[page_index]) {
274 case Common::PageType::Unmapped: {
275 LOG_ERROR(HW_Memory,
276 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
277 current_vaddr, src_addr, size);
278 std::memset(dest_buffer, 0, copy_amount);
279 break;
280 }
281 case Common::PageType::Memory: {
282 DEBUG_ASSERT(page_table.pointers[page_index]);
283
284 const u8* const src_ptr =
285 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
286 std::memcpy(dest_buffer, src_ptr, copy_amount);
287 break;
288 }
289 case Common::PageType::RasterizerCachedMemory: {
290 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
246 std::memcpy(dest_buffer, host_ptr, copy_amount); 291 std::memcpy(dest_buffer, host_ptr, copy_amount);
247 break; 292 break;
248 } 293 }
@@ -261,6 +306,10 @@ struct Memory::Impl {
261 ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); 306 ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
262 } 307 }
263 308
309 void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
310 ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
311 }
312
264 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, 313 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
265 const std::size_t size) { 314 const std::size_t size) {
266 const auto& page_table = process.VMManager().page_table; 315 const auto& page_table = process.VMManager().page_table;
@@ -290,7 +339,50 @@ struct Memory::Impl {
290 } 339 }
291 case Common::PageType::RasterizerCachedMemory: { 340 case Common::PageType::RasterizerCachedMemory: {
292 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 341 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
293 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); 342 system.GPU().InvalidateRegion(current_vaddr, copy_amount);
343 std::memcpy(host_ptr, src_buffer, copy_amount);
344 break;
345 }
346 default:
347 UNREACHABLE();
348 }
349
350 page_index++;
351 page_offset = 0;
352 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
353 remaining_size -= copy_amount;
354 }
355 }
356
357 void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
358 const void* src_buffer, const std::size_t size) {
359 const auto& page_table = process.VMManager().page_table;
360 std::size_t remaining_size = size;
361 std::size_t page_index = dest_addr >> PAGE_BITS;
362 std::size_t page_offset = dest_addr & PAGE_MASK;
363
364 while (remaining_size > 0) {
365 const std::size_t copy_amount =
366 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
367 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
368
369 switch (page_table.attributes[page_index]) {
370 case Common::PageType::Unmapped: {
371 LOG_ERROR(HW_Memory,
372 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
373 current_vaddr, dest_addr, size);
374 break;
375 }
376 case Common::PageType::Memory: {
377 DEBUG_ASSERT(page_table.pointers[page_index]);
378
379 u8* const dest_ptr =
380 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
381 std::memcpy(dest_ptr, src_buffer, copy_amount);
382 break;
383 }
384 case Common::PageType::RasterizerCachedMemory: {
385 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
294 std::memcpy(host_ptr, src_buffer, copy_amount); 386 std::memcpy(host_ptr, src_buffer, copy_amount);
295 break; 387 break;
296 } 388 }
@@ -309,6 +401,10 @@ struct Memory::Impl {
309 WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); 401 WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
310 } 402 }
311 403
404 void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
405 WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
406 }
407
312 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { 408 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
313 const auto& page_table = process.VMManager().page_table; 409 const auto& page_table = process.VMManager().page_table;
314 std::size_t remaining_size = size; 410 std::size_t remaining_size = size;
@@ -337,7 +433,7 @@ struct Memory::Impl {
337 } 433 }
338 case Common::PageType::RasterizerCachedMemory: { 434 case Common::PageType::RasterizerCachedMemory: {
339 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 435 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
340 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); 436 system.GPU().InvalidateRegion(current_vaddr, copy_amount);
341 std::memset(host_ptr, 0, copy_amount); 437 std::memset(host_ptr, 0, copy_amount);
342 break; 438 break;
343 } 439 }
@@ -384,7 +480,7 @@ struct Memory::Impl {
384 } 480 }
385 case Common::PageType::RasterizerCachedMemory: { 481 case Common::PageType::RasterizerCachedMemory: {
386 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 482 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
387 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); 483 system.GPU().FlushRegion(current_vaddr, copy_amount);
388 WriteBlock(process, dest_addr, host_ptr, copy_amount); 484 WriteBlock(process, dest_addr, host_ptr, copy_amount);
389 break; 485 break;
390 } 486 }
@@ -545,7 +641,7 @@ struct Memory::Impl {
545 break; 641 break;
546 case Common::PageType::RasterizerCachedMemory: { 642 case Common::PageType::RasterizerCachedMemory: {
547 const u8* const host_ptr = GetPointerFromVMA(vaddr); 643 const u8* const host_ptr = GetPointerFromVMA(vaddr);
548 system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); 644 system.GPU().FlushRegion(vaddr, sizeof(T));
549 T value; 645 T value;
550 std::memcpy(&value, host_ptr, sizeof(T)); 646 std::memcpy(&value, host_ptr, sizeof(T));
551 return value; 647 return value;
@@ -587,7 +683,7 @@ struct Memory::Impl {
587 break; 683 break;
588 case Common::PageType::RasterizerCachedMemory: { 684 case Common::PageType::RasterizerCachedMemory: {
589 u8* const host_ptr{GetPointerFromVMA(vaddr)}; 685 u8* const host_ptr{GetPointerFromVMA(vaddr)};
590 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); 686 system.GPU().InvalidateRegion(vaddr, sizeof(T));
591 std::memcpy(host_ptr, &data, sizeof(T)); 687 std::memcpy(host_ptr, &data, sizeof(T));
592 break; 688 break;
593 } 689 }
@@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
696 impl->ReadBlock(src_addr, dest_buffer, size); 792 impl->ReadBlock(src_addr, dest_buffer, size);
697} 793}
698 794
795void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
796 void* dest_buffer, const std::size_t size) {
797 impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
798}
799
800void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
801 impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
802}
803
699void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, 804void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
700 std::size_t size) { 805 std::size_t size) {
701 impl->WriteBlock(process, dest_addr, src_buffer, size); 806 impl->WriteBlock(process, dest_addr, src_buffer, size);
@@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
705 impl->WriteBlock(dest_addr, src_buffer, size); 810 impl->WriteBlock(dest_addr, src_buffer, size);
706} 811}
707 812
813void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
814 const void* src_buffer, std::size_t size) {
815 impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
816}
817
818void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
819 const std::size_t size) {
820 impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
821}
822
708void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { 823void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
709 impl->ZeroBlock(process, dest_addr, size); 824 impl->ZeroBlock(process, dest_addr, size);
710} 825}
diff --git a/src/core/memory.h b/src/core/memory.h
index 8913a9da4..b92d678a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -295,6 +295,27 @@ public:
295 std::size_t size); 295 std::size_t size);
296 296
297 /** 297 /**
298 * Reads a contiguous block of bytes from a specified process' address space.
299 * This unsafe version does not trigger GPU flushing.
300 *
301 * @param process The process to read the data from.
302 * @param src_addr The virtual address to begin reading from.
303 * @param dest_buffer The buffer to place the read bytes into.
304 * @param size The amount of data to read, in bytes.
305 *
306 * @note If a size of 0 is specified, then this function reads nothing and
307 * no attempts to access memory are made at all.
308 *
309 * @pre dest_buffer must be at least size bytes in length, otherwise a
310 * buffer overrun will occur.
311 *
312 * @post The range [dest_buffer, size) contains the read bytes from the
313 * process' address space.
314 */
315 void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
316 std::size_t size);
317
318 /**
298 * Reads a contiguous block of bytes from the current process' address space. 319 * Reads a contiguous block of bytes from the current process' address space.
299 * 320 *
300 * @param src_addr The virtual address to begin reading from. 321 * @param src_addr The virtual address to begin reading from.
@@ -313,6 +334,25 @@ public:
313 void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); 334 void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
314 335
315 /** 336 /**
337 * Reads a contiguous block of bytes from the current process' address space.
338 * This unsafe version does not trigger GPU flushing.
339 *
340 * @param src_addr The virtual address to begin reading from.
341 * @param dest_buffer The buffer to place the read bytes into.
342 * @param size The amount of data to read, in bytes.
343 *
344 * @note If a size of 0 is specified, then this function reads nothing and
345 * no attempts to access memory are made at all.
346 *
347 * @pre dest_buffer must be at least size bytes in length, otherwise a
348 * buffer overrun will occur.
349 *
350 * @post The range [dest_buffer, size) contains the read bytes from the
351 * current process' address space.
352 */
353 void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
354
355 /**
316 * Writes a range of bytes into a given process' address space at the specified 356 * Writes a range of bytes into a given process' address space at the specified
317 * virtual address. 357 * virtual address.
318 * 358 *
@@ -336,6 +376,26 @@ public:
336 std::size_t size); 376 std::size_t size);
337 377
338 /** 378 /**
379 * Writes a range of bytes into a given process' address space at the specified
380 * virtual address.
381 * This unsafe version does not invalidate GPU Memory.
382 *
383 * @param process The process to write data into the address space of.
384 * @param dest_addr The destination virtual address to begin writing the data at.
385 * @param src_buffer The data to write into the process' address space.
386 * @param size The size of the data to write, in bytes.
387 *
388 * @post The address range [dest_addr, size) in the process' address space
389 * contains the data that was within src_buffer.
390 *
391 * @post If an attempt is made to write into an unmapped region of memory, the writes
392 * will be ignored and an error will be logged.
393 *
394 */
395 void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
396 std::size_t size);
397
398 /**
339 * Writes a range of bytes into the current process' address space at the specified 399 * Writes a range of bytes into the current process' address space at the specified
340 * virtual address. 400 * virtual address.
341 * 401 *
@@ -357,6 +417,24 @@ public:
357 void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); 417 void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
358 418
359 /** 419 /**
420 * Writes a range of bytes into the current process' address space at the specified
421 * virtual address.
422 * This unsafe version does not invalidate GPU Memory.
423 *
424 * @param dest_addr The destination virtual address to begin writing the data at.
425 * @param src_buffer The data to write into the current process' address space.
426 * @param size The size of the data to write, in bytes.
427 *
428 * @post The address range [dest_addr, size) in the current process' address space
429 * contains the data that was within src_buffer.
430 *
431 * @post If an attempt is made to write into an unmapped region of memory, the writes
432 * will be ignored and an error will be logged.
433 *
434 */
435 void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
436
437 /**
360 * Fills the specified address range within a process' address space with zeroes. 438 * Fills the specified address range within a process' address space with zeroes.
361 * 439 *
362 * @param process The process that will have a portion of its memory zeroed out. 440 * @param process The process that will have a portion of its memory zeroed out.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index effe76a63..f7febd6a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -148,6 +148,7 @@ add_library(video_core STATIC
148 textures/convert.h 148 textures/convert.h
149 textures/decoders.cpp 149 textures/decoders.cpp
150 textures/decoders.h 150 textures/decoders.h
151 textures/texture.cpp
151 textures/texture.h 152 textures/texture.h
152 video_core.cpp 153 video_core.cpp
153 video_core.h 154 video_core.h
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index 4b9193182..e35ee0b67 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,37 +15,29 @@ namespace VideoCommon {
15 15
16class BufferBlock { 16class BufferBlock {
17public: 17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const { 18 bool Overlaps(const VAddr start, const VAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start); 19 return (cpu_addr < end) && (cpu_addr_end > start);
20 } 20 }
21 21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { 22 bool IsInside(const VAddr other_start, const VAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end; 23 return cpu_addr <= other_start && other_end <= cpu_addr_end;
24 } 24 }
25 25
26 u8* GetWritableHostPtr() const { 26 std::size_t GetOffset(const VAddr in_addr) {
27 return FromCacheAddr(cache_addr); 27 return static_cast<std::size_t>(in_addr - cpu_addr);
28 } 28 }
29 29
30 u8* GetWritableHostPtr(std::size_t offset) const { 30 VAddr GetCpuAddr() const {
31 return FromCacheAddr(cache_addr + offset); 31 return cpu_addr;
32 } 32 }
33 33
34 std::size_t GetOffset(const CacheAddr in_addr) { 34 VAddr GetCpuAddrEnd() const {
35 return static_cast<std::size_t>(in_addr - cache_addr); 35 return cpu_addr_end;
36 } 36 }
37 37
38 CacheAddr GetCacheAddr() const { 38 void SetCpuAddr(const VAddr new_addr) {
39 return cache_addr; 39 cpu_addr = new_addr;
40 } 40 cpu_addr_end = new_addr + size;
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 } 41 }
50 42
51 std::size_t GetSize() const { 43 std::size_t GetSize() const {
@@ -61,14 +53,14 @@ public:
61 } 53 }
62 54
63protected: 55protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { 56 explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr); 57 SetCpuAddr(cpu_addr);
66 } 58 }
67 ~BufferBlock() = default; 59 ~BufferBlock() = default;
68 60
69private: 61private:
70 CacheAddr cache_addr{}; 62 VAddr cpu_addr{};
71 CacheAddr cache_addr_end{}; 63 VAddr cpu_addr_end{};
72 std::size_t size{}; 64 std::size_t size{};
73 u64 epoch{}; 65 u64 epoch{};
74}; 66};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 186aca61d..b57c0d4d4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
19#include "common/alignment.h" 19#include "common/alignment.h"
20#include "common/common_types.h" 20#include "common/common_types.h"
21#include "core/core.h" 21#include "core/core.h"
22#include "core/memory.h"
22#include "video_core/buffer_cache/buffer_block.h" 23#include "video_core/buffer_cache/buffer_block.h"
23#include "video_core/buffer_cache/map_interval.h" 24#include "video_core/buffer_cache/map_interval.h"
24#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
@@ -37,28 +38,45 @@ public:
37 bool is_written = false, bool use_fast_cbuf = false) { 38 bool is_written = false, bool use_fast_cbuf = false) {
38 std::lock_guard lock{mutex}; 39 std::lock_guard lock{mutex};
39 40
40 auto& memory_manager = system.GPU().MemoryManager(); 41 const std::optional<VAddr> cpu_addr_opt =
41 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 42 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
42 if (!host_ptr) { 43
44 if (!cpu_addr_opt) {
43 return {GetEmptyBuffer(size), 0}; 45 return {GetEmptyBuffer(size), 0};
44 } 46 }
45 const auto cache_addr = ToCacheAddr(host_ptr); 47
48 VAddr cpu_addr = *cpu_addr_opt;
46 49
47 // Cache management is a big overhead, so only cache entries with a given size. 50 // Cache management is a big overhead, so only cache entries with a given size.
48 // TODO: Figure out which size is the best for given games. 51 // TODO: Figure out which size is the best for given games.
49 constexpr std::size_t max_stream_size = 0x800; 52 constexpr std::size_t max_stream_size = 0x800;
50 if (use_fast_cbuf || size < max_stream_size) { 53 if (use_fast_cbuf || size < max_stream_size) {
51 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { 54 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
55 auto& memory_manager = system.GPU().MemoryManager();
52 if (use_fast_cbuf) { 56 if (use_fast_cbuf) {
53 return ConstBufferUpload(host_ptr, size); 57 if (memory_manager.IsGranularRange(gpu_addr, size)) {
58 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
59 return ConstBufferUpload(host_ptr, size);
60 } else {
61 staging_buffer.resize(size);
62 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
63 return ConstBufferUpload(staging_buffer.data(), size);
64 }
54 } else { 65 } else {
55 return StreamBufferUpload(host_ptr, size, alignment); 66 if (memory_manager.IsGranularRange(gpu_addr, size)) {
67 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
68 return StreamBufferUpload(host_ptr, size, alignment);
69 } else {
70 staging_buffer.resize(size);
71 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
72 return StreamBufferUpload(staging_buffer.data(), size, alignment);
73 }
56 } 74 }
57 } 75 }
58 } 76 }
59 77
60 auto block = GetBlock(cache_addr, size); 78 auto block = GetBlock(cpu_addr, size);
61 auto map = MapAddress(block, gpu_addr, cache_addr, size); 79 auto map = MapAddress(block, gpu_addr, cpu_addr, size);
62 if (is_written) { 80 if (is_written) {
63 map->MarkAsModified(true, GetModifiedTicks()); 81 map->MarkAsModified(true, GetModifiedTicks());
64 if (!map->IsWritten()) { 82 if (!map->IsWritten()) {
@@ -71,7 +89,7 @@ public:
71 } 89 }
72 } 90 }
73 91
74 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); 92 const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
75 93
76 return {ToHandle(block), offset}; 94 return {ToHandle(block), offset};
77 } 95 }
@@ -112,7 +130,7 @@ public:
112 } 130 }
113 131
114 /// Write any cached resources overlapping the specified region back to memory 132 /// Write any cached resources overlapping the specified region back to memory
115 void FlushRegion(CacheAddr addr, std::size_t size) { 133 void FlushRegion(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex}; 134 std::lock_guard lock{mutex};
117 135
118 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 136 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -127,7 +145,7 @@ public:
127 } 145 }
128 146
129 /// Mark the specified region as being invalidated 147 /// Mark the specified region as being invalidated
130 void InvalidateRegion(CacheAddr addr, u64 size) { 148 void InvalidateRegion(VAddr addr, u64 size) {
131 std::lock_guard lock{mutex}; 149 std::lock_guard lock{mutex};
132 150
133 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 151 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -152,7 +170,7 @@ protected:
152 170
153 virtual void WriteBarrier() = 0; 171 virtual void WriteBarrier() = 0;
154 172
155 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; 173 virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
156 174
157 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, 175 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
158 const u8* data) = 0; 176 const u8* data) = 0;
@@ -169,20 +187,17 @@ protected:
169 187
170 /// Register an object into the cache 188 /// Register an object into the cache
171 void Register(const MapInterval& new_map, bool inherit_written = false) { 189 void Register(const MapInterval& new_map, bool inherit_written = false) {
172 const CacheAddr cache_ptr = new_map->GetStart(); 190 const VAddr cpu_addr = new_map->GetStart();
173 const std::optional<VAddr> cpu_addr = 191 if (!cpu_addr) {
174 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
175 if (!cache_ptr || !cpu_addr) {
176 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", 192 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
177 new_map->GetGpuAddress()); 193 new_map->GetGpuAddress());
178 return; 194 return;
179 } 195 }
180 const std::size_t size = new_map->GetEnd() - new_map->GetStart(); 196 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
181 new_map->SetCpuAddress(*cpu_addr);
182 new_map->MarkAsRegistered(true); 197 new_map->MarkAsRegistered(true);
183 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; 198 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
184 mapped_addresses.insert({interval, new_map}); 199 mapped_addresses.insert({interval, new_map});
185 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); 200 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
186 if (inherit_written) { 201 if (inherit_written) {
187 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); 202 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
188 new_map->MarkAsWritten(true); 203 new_map->MarkAsWritten(true);
@@ -192,7 +207,7 @@ protected:
192 /// Unregisters an object from the cache 207 /// Unregisters an object from the cache
193 void Unregister(MapInterval& map) { 208 void Unregister(MapInterval& map) {
194 const std::size_t size = map->GetEnd() - map->GetStart(); 209 const std::size_t size = map->GetEnd() - map->GetStart();
195 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); 210 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
196 map->MarkAsRegistered(false); 211 map->MarkAsRegistered(false);
197 if (map->IsWritten()) { 212 if (map->IsWritten()) {
198 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 213 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -202,32 +217,39 @@ protected:
202 } 217 }
203 218
204private: 219private:
205 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { 220 MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
206 return std::make_shared<MapIntervalBase>(start, end, gpu_addr); 221 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
207 } 222 }
208 223
209 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, 224 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
210 const CacheAddr cache_addr, const std::size_t size) { 225 const std::size_t size) {
211 226
212 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); 227 std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
213 if (overlaps.empty()) { 228 if (overlaps.empty()) {
214 const CacheAddr cache_addr_end = cache_addr + size; 229 auto& memory_manager = system.GPU().MemoryManager();
215 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); 230 const VAddr cpu_addr_end = cpu_addr + size;
216 u8* host_ptr = FromCacheAddr(cache_addr); 231 MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
217 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); 232 if (memory_manager.IsGranularRange(gpu_addr, size)) {
233 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
234 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
235 } else {
236 staging_buffer.resize(size);
237 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
238 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
239 }
218 Register(new_map); 240 Register(new_map);
219 return new_map; 241 return new_map;
220 } 242 }
221 243
222 const CacheAddr cache_addr_end = cache_addr + size; 244 const VAddr cpu_addr_end = cpu_addr + size;
223 if (overlaps.size() == 1) { 245 if (overlaps.size() == 1) {
224 MapInterval& current_map = overlaps[0]; 246 MapInterval& current_map = overlaps[0];
225 if (current_map->IsInside(cache_addr, cache_addr_end)) { 247 if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
226 return current_map; 248 return current_map;
227 } 249 }
228 } 250 }
229 CacheAddr new_start = cache_addr; 251 VAddr new_start = cpu_addr;
230 CacheAddr new_end = cache_addr_end; 252 VAddr new_end = cpu_addr_end;
231 bool write_inheritance = false; 253 bool write_inheritance = false;
232 bool modified_inheritance = false; 254 bool modified_inheritance = false;
233 // Calculate new buffer parameters 255 // Calculate new buffer parameters
@@ -237,7 +259,7 @@ private:
237 write_inheritance |= overlap->IsWritten(); 259 write_inheritance |= overlap->IsWritten();
238 modified_inheritance |= overlap->IsModified(); 260 modified_inheritance |= overlap->IsModified();
239 } 261 }
240 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; 262 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
241 for (auto& overlap : overlaps) { 263 for (auto& overlap : overlaps) {
242 Unregister(overlap); 264 Unregister(overlap);
243 } 265 }
@@ -250,7 +272,7 @@ private:
250 return new_map; 272 return new_map;
251 } 273 }
252 274
253 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, 275 void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
254 std::vector<MapInterval>& overlaps) { 276 std::vector<MapInterval>& overlaps) {
255 const IntervalType base_interval{start, end}; 277 const IntervalType base_interval{start, end};
256 IntervalSet interval_set{}; 278 IntervalSet interval_set{};
@@ -262,13 +284,15 @@ private:
262 for (auto& interval : interval_set) { 284 for (auto& interval : interval_set) {
263 std::size_t size = interval.upper() - interval.lower(); 285 std::size_t size = interval.upper() - interval.lower();
264 if (size > 0) { 286 if (size > 0) {
265 u8* host_ptr = FromCacheAddr(interval.lower()); 287 staging_buffer.resize(size);
266 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); 288 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
289 UploadBlockData(block, block->GetOffset(interval.lower()), size,
290 staging_buffer.data());
267 } 291 }
268 } 292 }
269 } 293 }
270 294
271 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { 295 std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
272 if (size == 0) { 296 if (size == 0) {
273 return {}; 297 return {};
274 } 298 }
@@ -290,8 +314,9 @@ private:
290 void FlushMap(MapInterval map) { 314 void FlushMap(MapInterval map) {
291 std::size_t size = map->GetEnd() - map->GetStart(); 315 std::size_t size = map->GetEnd() - map->GetStart();
292 TBuffer block = blocks[map->GetStart() >> block_page_bits]; 316 TBuffer block = blocks[map->GetStart() >> block_page_bits];
293 u8* host_ptr = FromCacheAddr(map->GetStart()); 317 staging_buffer.resize(size);
294 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); 318 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
319 system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
295 map->MarkAsModified(false, 0); 320 map->MarkAsModified(false, 0);
296 } 321 }
297 322
@@ -316,14 +341,14 @@ private:
316 TBuffer EnlargeBlock(TBuffer buffer) { 341 TBuffer EnlargeBlock(TBuffer buffer) {
317 const std::size_t old_size = buffer->GetSize(); 342 const std::size_t old_size = buffer->GetSize();
318 const std::size_t new_size = old_size + block_page_size; 343 const std::size_t new_size = old_size + block_page_size;
319 const CacheAddr cache_addr = buffer->GetCacheAddr(); 344 const VAddr cpu_addr = buffer->GetCpuAddr();
320 TBuffer new_buffer = CreateBlock(cache_addr, new_size); 345 TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
321 CopyBlock(buffer, new_buffer, 0, 0, old_size); 346 CopyBlock(buffer, new_buffer, 0, 0, old_size);
322 buffer->SetEpoch(epoch); 347 buffer->SetEpoch(epoch);
323 pending_destruction.push_back(buffer); 348 pending_destruction.push_back(buffer);
324 const CacheAddr cache_addr_end = cache_addr + new_size - 1; 349 const VAddr cpu_addr_end = cpu_addr + new_size - 1;
325 u64 page_start = cache_addr >> block_page_bits; 350 u64 page_start = cpu_addr >> block_page_bits;
326 const u64 page_end = cache_addr_end >> block_page_bits; 351 const u64 page_end = cpu_addr_end >> block_page_bits;
327 while (page_start <= page_end) { 352 while (page_start <= page_end) {
328 blocks[page_start] = new_buffer; 353 blocks[page_start] = new_buffer;
329 ++page_start; 354 ++page_start;
@@ -334,9 +359,9 @@ private:
334 TBuffer MergeBlocks(TBuffer first, TBuffer second) { 359 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
335 const std::size_t size_1 = first->GetSize(); 360 const std::size_t size_1 = first->GetSize();
336 const std::size_t size_2 = second->GetSize(); 361 const std::size_t size_2 = second->GetSize();
337 const CacheAddr first_addr = first->GetCacheAddr(); 362 const VAddr first_addr = first->GetCpuAddr();
338 const CacheAddr second_addr = second->GetCacheAddr(); 363 const VAddr second_addr = second->GetCpuAddr();
339 const CacheAddr new_addr = std::min(first_addr, second_addr); 364 const VAddr new_addr = std::min(first_addr, second_addr);
340 const std::size_t new_size = size_1 + size_2; 365 const std::size_t new_size = size_1 + size_2;
341 TBuffer new_buffer = CreateBlock(new_addr, new_size); 366 TBuffer new_buffer = CreateBlock(new_addr, new_size);
342 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); 367 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@@ -345,9 +370,9 @@ private:
345 second->SetEpoch(epoch); 370 second->SetEpoch(epoch);
346 pending_destruction.push_back(first); 371 pending_destruction.push_back(first);
347 pending_destruction.push_back(second); 372 pending_destruction.push_back(second);
348 const CacheAddr cache_addr_end = new_addr + new_size - 1; 373 const VAddr cpu_addr_end = new_addr + new_size - 1;
349 u64 page_start = new_addr >> block_page_bits; 374 u64 page_start = new_addr >> block_page_bits;
350 const u64 page_end = cache_addr_end >> block_page_bits; 375 const u64 page_end = cpu_addr_end >> block_page_bits;
351 while (page_start <= page_end) { 376 while (page_start <= page_end) {
352 blocks[page_start] = new_buffer; 377 blocks[page_start] = new_buffer;
353 ++page_start; 378 ++page_start;
@@ -355,18 +380,18 @@ private:
355 return new_buffer; 380 return new_buffer;
356 } 381 }
357 382
358 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { 383 TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
359 TBuffer found{}; 384 TBuffer found{};
360 const CacheAddr cache_addr_end = cache_addr + size - 1; 385 const VAddr cpu_addr_end = cpu_addr + size - 1;
361 u64 page_start = cache_addr >> block_page_bits; 386 u64 page_start = cpu_addr >> block_page_bits;
362 const u64 page_end = cache_addr_end >> block_page_bits; 387 const u64 page_end = cpu_addr_end >> block_page_bits;
363 while (page_start <= page_end) { 388 while (page_start <= page_end) {
364 auto it = blocks.find(page_start); 389 auto it = blocks.find(page_start);
365 if (it == blocks.end()) { 390 if (it == blocks.end()) {
366 if (found) { 391 if (found) {
367 found = EnlargeBlock(found); 392 found = EnlargeBlock(found);
368 } else { 393 } else {
369 const CacheAddr start_addr = (page_start << block_page_bits); 394 const VAddr start_addr = (page_start << block_page_bits);
370 found = CreateBlock(start_addr, block_page_size); 395 found = CreateBlock(start_addr, block_page_size);
371 blocks[page_start] = found; 396 blocks[page_start] = found;
372 } 397 }
@@ -386,7 +411,7 @@ private:
386 return found; 411 return found;
387 } 412 }
388 413
389 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { 414 void MarkRegionAsWritten(const VAddr start, const VAddr end) {
390 u64 page_start = start >> write_page_bit; 415 u64 page_start = start >> write_page_bit;
391 const u64 page_end = end >> write_page_bit; 416 const u64 page_end = end >> write_page_bit;
392 while (page_start <= page_end) { 417 while (page_start <= page_end) {
@@ -400,7 +425,7 @@ private:
400 } 425 }
401 } 426 }
402 427
403 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { 428 void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
404 u64 page_start = start >> write_page_bit; 429 u64 page_start = start >> write_page_bit;
405 const u64 page_end = end >> write_page_bit; 430 const u64 page_end = end >> write_page_bit;
406 while (page_start <= page_end) { 431 while (page_start <= page_end) {
@@ -416,7 +441,7 @@ private:
416 } 441 }
417 } 442 }
418 443
419 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { 444 bool IsRegionWritten(const VAddr start, const VAddr end) const {
420 u64 page_start = start >> write_page_bit; 445 u64 page_start = start >> write_page_bit;
421 const u64 page_end = end >> write_page_bit; 446 const u64 page_end = end >> write_page_bit;
422 while (page_start <= page_end) { 447 while (page_start <= page_end) {
@@ -440,8 +465,8 @@ private:
440 u64 buffer_offset = 0; 465 u64 buffer_offset = 0;
441 u64 buffer_offset_base = 0; 466 u64 buffer_offset_base = 0;
442 467
443 using IntervalSet = boost::icl::interval_set<CacheAddr>; 468 using IntervalSet = boost::icl::interval_set<VAddr>;
444 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; 469 using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
445 using IntervalType = typename IntervalCache::interval_type; 470 using IntervalType = typename IntervalCache::interval_type;
446 IntervalCache mapped_addresses; 471 IntervalCache mapped_addresses;
447 472
@@ -456,6 +481,8 @@ private:
456 u64 epoch = 0; 481 u64 epoch = 0;
457 u64 modified_ticks = 0; 482 u64 modified_ticks = 0;
458 483
484 std::vector<u8> staging_buffer;
485
459 std::recursive_mutex mutex; 486 std::recursive_mutex mutex;
460}; 487};
461 488
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 3a104d5cd..b0956029d 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -11,7 +11,7 @@ namespace VideoCommon {
11 11
12class MapIntervalBase { 12class MapIntervalBase {
13public: 13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) 14 MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {} 15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16 16
17 void SetCpuAddress(VAddr new_cpu_addr) { 17 void SetCpuAddress(VAddr new_cpu_addr) {
@@ -26,7 +26,7 @@ public:
26 return gpu_addr; 26 return gpu_addr;
27 } 27 }
28 28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { 29 bool IsInside(const VAddr other_start, const VAddr other_end) const {
30 return (start <= other_start && other_end <= end); 30 return (start <= other_start && other_end <= end);
31 } 31 }
32 32
@@ -46,11 +46,11 @@ public:
46 return is_registered; 46 return is_registered;
47 } 47 }
48 48
49 CacheAddr GetStart() const { 49 VAddr GetStart() const {
50 return start; 50 return start;
51 } 51 }
52 52
53 CacheAddr GetEnd() const { 53 VAddr GetEnd() const {
54 return end; 54 return end;
55 } 55 }
56 56
@@ -76,8 +76,8 @@ public:
76 } 76 }
77 77
78private: 78private:
79 CacheAddr start; 79 VAddr start;
80 CacheAddr end; 80 VAddr end;
81 GPUVAddr gpu_addr; 81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{}; 82 VAddr cpu_addr{};
83 bool is_written{}; 83 bool is_written{};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 930b605af..498936f0c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1712,6 +1712,7 @@ public:
1712 BRK, 1712 BRK,
1713 DEPBAR, 1713 DEPBAR,
1714 VOTE, 1714 VOTE,
1715 VOTE_VTG,
1715 SHFL, 1716 SHFL,
1716 FSWZADD, 1717 FSWZADD,
1717 BFE_C, 1718 BFE_C,
@@ -1758,6 +1759,7 @@ public:
1758 IPA, 1759 IPA,
1759 OUT_R, // Emit vertex/primitive 1760 OUT_R, // Emit vertex/primitive
1760 ISBERD, 1761 ISBERD,
1762 BAR,
1761 MEMBAR, 1763 MEMBAR,
1762 VMAD, 1764 VMAD,
1763 VSETP, 1765 VSETP,
@@ -1842,7 +1844,7 @@ public:
1842 MOV_C, 1844 MOV_C,
1843 MOV_R, 1845 MOV_R,
1844 MOV_IMM, 1846 MOV_IMM,
1845 MOV_SYS, 1847 S2R,
1846 MOV32_IMM, 1848 MOV32_IMM,
1847 SHL_C, 1849 SHL_C,
1848 SHL_R, 1850 SHL_R,
@@ -2026,6 +2028,7 @@ private:
2026 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 2028 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2027 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 2029 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2028 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 2030 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2031 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
2029 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), 2032 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
2030 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), 2033 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
2031 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 2034 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
@@ -2063,6 +2066,7 @@ private:
2063 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 2066 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
2064 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 2067 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
2065 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 2068 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
2069 INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
2066 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), 2070 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2067 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), 2071 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2068 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), 2072 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
@@ -2134,7 +2138,7 @@ private:
2134 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), 2138 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
2135 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), 2139 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
2136 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), 2140 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
2137 INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"), 2141 INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
2138 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), 2142 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
2139 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), 2143 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
2140 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), 2144 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index bc80661d8..72e2a33d5 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -4,6 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <optional>
9
7#include "common/bit_field.h" 10#include "common/bit_field.h"
8#include "common/common_funcs.h" 11#include "common/common_funcs.h"
9#include "common/common_types.h" 12#include "common/common_types.h"
@@ -16,7 +19,7 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 19 TriangleStrip = 7,
17}; 20};
18 21
19enum class AttributeUse : u8 { 22enum class PixelImap : u8 {
20 Unused = 0, 23 Unused = 0,
21 Constant = 1, 24 Constant = 1,
22 Perspective = 2, 25 Perspective = 2,
@@ -24,7 +27,7 @@ enum class AttributeUse : u8 {
24}; 27};
25 28
26// Documentation in: 29// Documentation in:
27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
28struct Header { 31struct Header {
29 union { 32 union {
30 BitField<0, 5, u32> sph_type; 33 BitField<0, 5, u32> sph_type;
@@ -59,8 +62,8 @@ struct Header {
59 union { 62 union {
60 BitField<0, 12, u32> max_output_vertices; 63 BitField<0, 12, u32> max_output_vertices;
61 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. 64 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
62 BitField<24, 4, u32> reserved; 65 BitField<20, 4, u32> reserved;
63 BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. 66 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
64 } common4{}; 67 } common4{};
65 68
66 union { 69 union {
@@ -93,17 +96,20 @@ struct Header {
93 struct { 96 struct {
94 INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA 97 INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
95 INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB 98 INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
99
96 union { 100 union {
97 BitField<0, 2, AttributeUse> x; 101 BitField<0, 2, PixelImap> x;
98 BitField<2, 2, AttributeUse> y; 102 BitField<2, 2, PixelImap> y;
99 BitField<4, 2, AttributeUse> w; 103 BitField<4, 2, PixelImap> z;
100 BitField<6, 2, AttributeUse> z; 104 BitField<6, 2, PixelImap> w;
101 u8 raw; 105 u8 raw;
102 } imap_generic_vector[32]; 106 } imap_generic_vector[32];
107
103 INSERT_UNION_PADDING_BYTES(2); // ImapColor 108 INSERT_UNION_PADDING_BYTES(2); // ImapColor
104 INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC 109 INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
105 INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] 110 INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
106 INSERT_UNION_PADDING_BYTES(2); // ImapReserved 111 INSERT_UNION_PADDING_BYTES(2); // ImapReserved
112
107 struct { 113 struct {
108 u32 target; 114 u32 target;
109 union { 115 union {
@@ -112,31 +118,30 @@ struct Header {
112 BitField<2, 30, u32> reserved; 118 BitField<2, 30, u32> reserved;
113 }; 119 };
114 } omap; 120 } omap;
121
115 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { 122 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
116 const u32 bit = render_target * 4 + component; 123 const u32 bit = render_target * 4 + component;
117 return omap.target & (1 << bit); 124 return omap.target & (1 << bit);
118 } 125 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { 126
120 return static_cast<AttributeUse>( 127 PixelImap GetPixelImap(u32 attribute) const {
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); 128 const auto get_index = [this, attribute](u32 index) {
122 } 129 return static_cast<PixelImap>(
123 AttributeUse GetAttributeUse(u32 attribute) const { 130 (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
124 AttributeUse result = AttributeUse::Unused; 131 };
125 for (u32 i = 0; i < 4; i++) { 132
126 const auto index = GetAttributeIndexUse(attribute, i); 133 std::optional<PixelImap> result;
127 if (index == AttributeUse::Unused) { 134 for (u32 component = 0; component < 4; ++component) {
128 continue; 135 const PixelImap index = get_index(component);
129 } 136 if (index == PixelImap::Unused) {
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue; 137 continue;
133 } 138 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); 139 if (result && result != index) {
135 if (index == AttributeUse::Perspective) { 140 LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
136 result = index;
137 } 141 }
142 result = index;
138 } 143 }
139 return result; 144 return result.value_or(PixelImap::Unused);
140 } 145 }
141 } ps; 146 } ps;
142 147
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ced9d7e28..1a2d747be 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -270,13 +270,13 @@ public:
270 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 270 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
271 271
272 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 272 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
273 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 273 virtual void FlushRegion(VAddr addr, u64 size) = 0;
274 274
275 /// Notify rasterizer that any caches of the specified region should be invalidated 275 /// Notify rasterizer that any caches of the specified region should be invalidated
276 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; 276 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
277 277
278 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 278 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
279 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 279 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
280 280
281protected: 281protected:
282 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; 282 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 925be8d7b..cc434faf7 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
30 gpu_thread.SwapBuffers(framebuffer); 30 gpu_thread.SwapBuffers(framebuffer);
31} 31}
32 32
33void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { 33void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
34 gpu_thread.FlushRegion(addr, size); 34 gpu_thread.FlushRegion(addr, size);
35} 35}
36 36
37void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { 37void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
38 gpu_thread.InvalidateRegion(addr, size); 38 gpu_thread.InvalidateRegion(addr, size);
39} 39}
40 40
41void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 41void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
42 gpu_thread.FlushAndInvalidateRegion(addr, size); 42 gpu_thread.FlushAndInvalidateRegion(addr, size);
43} 43}
44 44
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 265c62758..03fd0eef0 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -27,9 +27,9 @@ public:
27 void Start() override; 27 void Start() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override; 28 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
30 void FlushRegion(CacheAddr addr, u64 size) override; 30 void FlushRegion(VAddr addr, u64 size) override;
31 void InvalidateRegion(CacheAddr addr, u64 size) override; 31 void InvalidateRegion(VAddr addr, u64 size) override;
32 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
33 void WaitIdle() const override; 33 void WaitIdle() const override;
34 34
35protected: 35protected:
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index bd5278a5c..6f38a672a 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
26 renderer->SwapBuffers(framebuffer); 26 renderer->SwapBuffers(framebuffer);
27} 27}
28 28
29void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { 29void GPUSynch::FlushRegion(VAddr addr, u64 size) {
30 renderer->Rasterizer().FlushRegion(addr, size); 30 renderer->Rasterizer().FlushRegion(addr, size);
31} 31}
32 32
33void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { 33void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
34 renderer->Rasterizer().InvalidateRegion(addr, size); 34 renderer->Rasterizer().InvalidateRegion(addr, size);
35} 35}
36 36
37void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 37void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
38 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); 38 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
39} 39}
40 40
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 866a94c8c..4a6e9a01d 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -26,9 +26,9 @@ public:
26 void Start() override; 26 void Start() override;
27 void PushGPUEntries(Tegra::CommandList&& entries) override; 27 void PushGPUEntries(Tegra::CommandList&& entries) override;
28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override; 29 void FlushRegion(VAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override; 30 void InvalidateRegion(VAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
32 void WaitIdle() const override {} 32 void WaitIdle() const override {}
33 33
34protected: 34protected:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 270c7ae0d..10cda686b 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
77 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 77 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
78} 78}
79 79
80void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { 80void ThreadManager::FlushRegion(VAddr addr, u64 size) {
81 PushCommand(FlushRegionCommand(addr, size)); 81 PushCommand(FlushRegionCommand(addr, size));
82} 82}
83 83
84void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 84void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
85 system.Renderer().Rasterizer().InvalidateRegion(addr, size); 85 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
86} 86}
87 87
88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 88void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size); 90 InvalidateRegion(addr, size);
91} 91}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index be36c580e..cd74ad330 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
47 47
48/// Command to signal to the GPU thread to flush a region 48/// Command to signal to the GPU thread to flush a region
49struct FlushRegionCommand final { 49struct FlushRegionCommand final {
50 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} 50 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
51 51
52 CacheAddr addr; 52 VAddr addr;
53 u64 size; 53 u64 size;
54}; 54};
55 55
56/// Command to signal to the GPU thread to invalidate a region 56/// Command to signal to the GPU thread to invalidate a region
57struct InvalidateRegionCommand final { 57struct InvalidateRegionCommand final {
58 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} 58 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
59 59
60 CacheAddr addr; 60 VAddr addr;
61 u64 size; 61 u64 size;
62}; 62};
63 63
64/// Command to signal to the GPU thread to flush and invalidate a region 64/// Command to signal to the GPU thread to flush and invalidate a region
65struct FlushAndInvalidateRegionCommand final { 65struct FlushAndInvalidateRegionCommand final {
66 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) 66 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
67 : addr{addr}, size{size} {} 67 : addr{addr}, size{size} {}
68 68
69 CacheAddr addr; 69 VAddr addr;
70 u64 size; 70 u64 size;
71}; 71};
72 72
@@ -111,13 +111,13 @@ public:
111 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 111 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
112 112
113 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 113 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
114 void FlushRegion(CacheAddr addr, u64 size); 114 void FlushRegion(VAddr addr, u64 size);
115 115
116 /// Notify rasterizer that any caches of the specified region should be invalidated 116 /// Notify rasterizer that any caches of the specified region should be invalidated
117 void InvalidateRegion(CacheAddr addr, u64 size); 117 void InvalidateRegion(VAddr addr, u64 size);
118 118
119 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 119 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
120 void FlushAndInvalidateRegion(CacheAddr addr, u64 size); 120 void FlushAndInvalidateRegion(VAddr addr, u64 size);
121 121
122 // Wait until the gpu thread is idle. 122 // Wait until the gpu thread is idle.
123 void WaitIdle() const; 123 void WaitIdle() const;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f5d33f27a..a3389d0d2 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
81 ASSERT((gpu_addr & page_mask) == 0); 81 ASSERT((gpu_addr & page_mask) == 0);
82 82
83 const u64 aligned_size{Common::AlignUp(size, page_size)}; 83 const u64 aligned_size{Common::AlignUp(size, page_size)};
84 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
85 const auto cpu_addr = GpuToCpuAddress(gpu_addr); 84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
86 ASSERT(cpu_addr); 85 ASSERT(cpu_addr);
87 86
88 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 87 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
89 system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); 88 system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
90 89
91 UnmapRange(gpu_addr, aligned_size); 90 UnmapRange(gpu_addr, aligned_size);
92 ASSERT(system.CurrentProcess() 91 ASSERT(system.CurrentProcess()
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
140 return {}; 139 return {};
141 } 140 }
142 141
143 const u8* page_pointer{page_table.pointers[addr >> page_bits]}; 142 const u8* page_pointer{GetPointer(addr)};
144 if (page_pointer) { 143 if (page_pointer) {
145 // NOTE: Avoid adding any extra logic to this fast-path block 144 // NOTE: Avoid adding any extra logic to this fast-path block
146 T value; 145 T value;
147 std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); 146 std::memcpy(&value, page_pointer, sizeof(T));
148 return value; 147 return value;
149 } 148 }
150 149
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
167 return; 166 return;
168 } 167 }
169 168
170 u8* page_pointer{page_table.pointers[addr >> page_bits]}; 169 u8* page_pointer{GetPointer(addr)};
171 if (page_pointer) { 170 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block 171 // NOTE: Avoid adding any extra logic to this fast-path block
173 std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); 172 std::memcpy(page_pointer, &data, sizeof(T));
174 return; 173 return;
175 } 174 }
176 175
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
201 return {}; 200 return {};
202 } 201 }
203 202
204 u8* const page_pointer{page_table.pointers[addr >> page_bits]}; 203 auto& memory = system.Memory();
205 if (page_pointer != nullptr) { 204
206 return page_pointer + (addr & page_mask); 205 const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
206
207 if (page_addr != 0) {
208 return memory.GetPointer(page_addr + (addr & page_mask));
207 } 209 }
208 210
209 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); 211 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
215 return {}; 217 return {};
216 } 218 }
217 219
218 const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; 220 const auto& memory = system.Memory();
219 if (page_pointer != nullptr) { 221
220 return page_pointer + (addr & page_mask); 222 const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
223
224 if (page_addr != 0) {
225 return memory.GetPointer(page_addr + (addr & page_mask));
221 } 226 }
222 227
223 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); 228 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
238 std::size_t page_index{src_addr >> page_bits}; 243 std::size_t page_index{src_addr >> page_bits};
239 std::size_t page_offset{src_addr & page_mask}; 244 std::size_t page_offset{src_addr & page_mask};
240 245
246 auto& memory = system.Memory();
247
241 while (remaining_size > 0) { 248 while (remaining_size > 0) {
242 const std::size_t copy_amount{ 249 const std::size_t copy_amount{
243 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 250 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
244 251
245 switch (page_table.attributes[page_index]) { 252 switch (page_table.attributes[page_index]) {
246 case Common::PageType::Memory: { 253 case Common::PageType::Memory: {
247 const u8* src_ptr{page_table.pointers[page_index] + page_offset}; 254 const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
248 // Flush must happen on the rasterizer interface, such that memory is always synchronous 255 // Flush must happen on the rasterizer interface, such that memory is always synchronous
249 // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. 256 // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
250 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); 257 rasterizer.FlushRegion(src_addr, copy_amount);
251 std::memcpy(dest_buffer, src_ptr, copy_amount); 258 memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
252 break; 259 break;
253 } 260 }
254 default: 261 default:
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
268 std::size_t page_index{src_addr >> page_bits}; 275 std::size_t page_index{src_addr >> page_bits};
269 std::size_t page_offset{src_addr & page_mask}; 276 std::size_t page_offset{src_addr & page_mask};
270 277
278 auto& memory = system.Memory();
279
271 while (remaining_size > 0) { 280 while (remaining_size > 0) {
272 const std::size_t copy_amount{ 281 const std::size_t copy_amount{
273 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 282 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
274 const u8* page_pointer = page_table.pointers[page_index]; 283 const u8* page_pointer = page_table.pointers[page_index];
275 if (page_pointer) { 284 if (page_pointer) {
276 const u8* src_ptr{page_pointer + page_offset}; 285 const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
277 std::memcpy(dest_buffer, src_ptr, copy_amount); 286 memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
278 } else { 287 } else {
279 std::memset(dest_buffer, 0, copy_amount); 288 std::memset(dest_buffer, 0, copy_amount);
280 } 289 }
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
290 std::size_t page_index{dest_addr >> page_bits}; 299 std::size_t page_index{dest_addr >> page_bits};
291 std::size_t page_offset{dest_addr & page_mask}; 300 std::size_t page_offset{dest_addr & page_mask};
292 301
302 auto& memory = system.Memory();
303
293 while (remaining_size > 0) { 304 while (remaining_size > 0) {
294 const std::size_t copy_amount{ 305 const std::size_t copy_amount{
295 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 306 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
296 307
297 switch (page_table.attributes[page_index]) { 308 switch (page_table.attributes[page_index]) {
298 case Common::PageType::Memory: { 309 case Common::PageType::Memory: {
299 u8* dest_ptr{page_table.pointers[page_index] + page_offset}; 310 const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
300 // Invalidate must happen on the rasterizer interface, such that memory is always 311 // Invalidate must happen on the rasterizer interface, such that memory is always
301 // synchronous when it is written (even when in asynchronous GPU mode). 312 // synchronous when it is written (even when in asynchronous GPU mode).
302 rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); 313 rasterizer.InvalidateRegion(dest_addr, copy_amount);
303 std::memcpy(dest_ptr, src_buffer, copy_amount); 314 memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
304 break; 315 break;
305 } 316 }
306 default: 317 default:
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
320 std::size_t page_index{dest_addr >> page_bits}; 331 std::size_t page_index{dest_addr >> page_bits};
321 std::size_t page_offset{dest_addr & page_mask}; 332 std::size_t page_offset{dest_addr & page_mask};
322 333
334 auto& memory = system.Memory();
335
323 while (remaining_size > 0) { 336 while (remaining_size > 0) {
324 const std::size_t copy_amount{ 337 const std::size_t copy_amount{
325 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 338 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
326 u8* page_pointer = page_table.pointers[page_index]; 339 u8* page_pointer = page_table.pointers[page_index];
327 if (page_pointer) { 340 if (page_pointer) {
328 u8* dest_ptr{page_pointer + page_offset}; 341 const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
329 std::memcpy(dest_ptr, src_buffer, copy_amount); 342 memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
330 } 343 }
331 page_index++; 344 page_index++;
332 page_offset = 0; 345 page_offset = 0;
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
336} 349}
337 350
338void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 351void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
339 std::size_t remaining_size{size}; 352 std::vector<u8> tmp_buffer(size);
340 std::size_t page_index{src_addr >> page_bits}; 353 ReadBlock(src_addr, tmp_buffer.data(), size);
341 std::size_t page_offset{src_addr & page_mask}; 354 WriteBlock(dest_addr, tmp_buffer.data(), size);
342
343 while (remaining_size > 0) {
344 const std::size_t copy_amount{
345 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
346
347 switch (page_table.attributes[page_index]) {
348 case Common::PageType::Memory: {
349 // Flush must happen on the rasterizer interface, such that memory is always synchronous
350 // when it is copied (even when in asynchronous GPU mode).
351 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
352 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
353 WriteBlock(dest_addr, src_ptr, copy_amount);
354 break;
355 }
356 default:
357 UNREACHABLE();
358 }
359
360 page_index++;
361 page_offset = 0;
362 dest_addr += static_cast<VAddr>(copy_amount);
363 src_addr += static_cast<VAddr>(copy_amount);
364 remaining_size -= copy_amount;
365 }
366} 355}
367 356
368void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 357void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
371 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); 360 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
372} 361}
373 362
363bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
364 const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
365 const std::size_t page = (addr & Memory::PAGE_MASK) + size;
366 return page <= Memory::PAGE_SIZE;
367}
368
374void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, 369void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
375 VAddr backing_addr) { 370 VAddr backing_addr) {
376 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, 371 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 073bdb491..0d9468535 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -97,6 +97,11 @@ public:
97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
99 99
100 /**
101 * IsGranularRange checks if a gpu region can be simply read with a pointer
102 */
103 bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
104
100private: 105private:
101 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; 106 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
102 using VMAHandle = VMAMap::const_iterator; 107 using VMAHandle = VMAMap::const_iterator;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index e66054ed0..5ea2b01f2 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -98,12 +98,12 @@ public:
98 static_cast<QueryCache&>(*this), 98 static_cast<QueryCache&>(*this),
99 VideoCore::QueryType::SamplesPassed}}} {} 99 VideoCore::QueryType::SamplesPassed}}} {}
100 100
101 void InvalidateRegion(CacheAddr addr, std::size_t size) { 101 void InvalidateRegion(VAddr addr, std::size_t size) {
102 std::unique_lock lock{mutex}; 102 std::unique_lock lock{mutex};
103 FlushAndRemoveRegion(addr, size); 103 FlushAndRemoveRegion(addr, size);
104 } 104 }
105 105
106 void FlushRegion(CacheAddr addr, std::size_t size) { 106 void FlushRegion(VAddr addr, std::size_t size) {
107 std::unique_lock lock{mutex}; 107 std::unique_lock lock{mutex};
108 FlushAndRemoveRegion(addr, size); 108 FlushAndRemoveRegion(addr, size);
109 } 109 }
@@ -117,14 +117,16 @@ public:
117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { 117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
118 std::unique_lock lock{mutex}; 118 std::unique_lock lock{mutex};
119 auto& memory_manager = system.GPU().MemoryManager(); 119 auto& memory_manager = system.GPU().MemoryManager();
120 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 120 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
121 ASSERT(cpu_addr_opt);
122 VAddr cpu_addr = *cpu_addr_opt;
121 123
122 CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); 124 CachedQuery* query = TryGet(cpu_addr);
123 if (!query) { 125 if (!query) {
124 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 126 ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
125 ASSERT_OR_EXECUTE(cpu_addr, return;); 127 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
126 128
127 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 129 query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
128 } 130 }
129 131
130 query->BindCounter(Stream(type).Current(), timestamp); 132 query->BindCounter(Stream(type).Current(), timestamp);
@@ -173,11 +175,11 @@ protected:
173 175
174private: 176private:
175 /// Flushes a memory range to guest memory and removes it from the cache. 177 /// Flushes a memory range to guest memory and removes it from the cache.
176 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { 178 void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
177 const u64 addr_begin = static_cast<u64>(addr); 179 const u64 addr_begin = static_cast<u64>(addr);
178 const u64 addr_end = addr_begin + static_cast<u64>(size); 180 const u64 addr_end = addr_begin + static_cast<u64>(size);
179 const auto in_range = [addr_begin, addr_end](CachedQuery& query) { 181 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
180 const u64 cache_begin = query.GetCacheAddr(); 182 const u64 cache_begin = query.GetCpuAddr();
181 const u64 cache_end = cache_begin + query.SizeInBytes(); 183 const u64 cache_end = cache_begin + query.SizeInBytes();
182 return cache_begin < addr_end && addr_begin < cache_end; 184 return cache_begin < addr_end && addr_begin < cache_end;
183 }; 185 };
@@ -193,7 +195,7 @@ private:
193 if (!in_range(query)) { 195 if (!in_range(query)) {
194 continue; 196 continue;
195 } 197 }
196 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); 198 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
197 query.Flush(); 199 query.Flush();
198 } 200 }
199 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), 201 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@@ -204,22 +206,21 @@ private:
204 /// Registers the passed parameters as cached and returns a pointer to the stored cached query. 206 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
205 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { 207 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
206 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); 208 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
207 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; 209 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
208 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, 210 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
209 host_ptr); 211 host_ptr);
210 } 212 }
211 213
212 /// Tries to a get a cached query. Returns nullptr on failure. 214 /// Tries to a get a cached query. Returns nullptr on failure.
213 CachedQuery* TryGet(CacheAddr addr) { 215 CachedQuery* TryGet(VAddr addr) {
214 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; 216 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
215 const auto it = cached_queries.find(page); 217 const auto it = cached_queries.find(page);
216 if (it == std::end(cached_queries)) { 218 if (it == std::end(cached_queries)) {
217 return nullptr; 219 return nullptr;
218 } 220 }
219 auto& contents = it->second; 221 auto& contents = it->second;
220 const auto found = 222 const auto found = std::find_if(std::begin(contents), std::end(contents),
221 std::find_if(std::begin(contents), std::end(contents), 223 [addr](auto& query) { return query.GetCpuAddr() == addr; });
222 [addr](auto& query) { return query.GetCacheAddr() == addr; });
223 return found != std::end(contents) ? &*found : nullptr; 224 return found != std::end(contents) ? &*found : nullptr;
224 } 225 }
225 226
@@ -323,14 +324,10 @@ public:
323 timestamp = timestamp_; 324 timestamp = timestamp_;
324 } 325 }
325 326
326 VAddr CpuAddr() const noexcept { 327 VAddr GetCpuAddr() const noexcept {
327 return cpu_addr; 328 return cpu_addr;
328 } 329 }
329 330
330 CacheAddr GetCacheAddr() const noexcept {
331 return ToCacheAddr(host_ptr);
332 }
333
334 u64 SizeInBytes() const noexcept { 331 u64 SizeInBytes() const noexcept {
335 return SizeInBytes(timestamp.has_value()); 332 return SizeInBytes(timestamp.has_value());
336 } 333 }
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 6de1597a2..22987751e 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -18,22 +18,14 @@
18 18
19class RasterizerCacheObject { 19class RasterizerCacheObject {
20public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr) 21 explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23 22
24 virtual ~RasterizerCacheObject(); 23 virtual ~RasterizerCacheObject();
25 24
26 CacheAddr GetCacheAddr() const { 25 VAddr GetCpuAddr() const {
27 return cache_addr; 26 return cpu_addr;
28 } 27 }
29 28
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
34 /// Gets the address of the shader in guest memory, required for cache management
35 virtual VAddr GetCpuAddr() const = 0;
36
37 /// Gets the size of the shader in guest memory, required for cache management 29 /// Gets the size of the shader in guest memory, required for cache management
38 virtual std::size_t GetSizeInBytes() const = 0; 30 virtual std::size_t GetSizeInBytes() const = 0;
39 31
@@ -68,8 +60,7 @@ private:
68 bool is_registered{}; ///< Whether the object is currently registered with the cache 60 bool is_registered{}; ///< Whether the object is currently registered with the cache
69 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 61 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
70 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 62 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
71 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region 63 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
72 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
73}; 64};
74 65
75template <class T> 66template <class T>
@@ -80,7 +71,7 @@ public:
80 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 71 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
81 72
82 /// Write any cached resources overlapping the specified region back to memory 73 /// Write any cached resources overlapping the specified region back to memory
83 void FlushRegion(CacheAddr addr, std::size_t size) { 74 void FlushRegion(VAddr addr, std::size_t size) {
84 std::lock_guard lock{mutex}; 75 std::lock_guard lock{mutex};
85 76
86 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 77 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -90,7 +81,7 @@ public:
90 } 81 }
91 82
92 /// Mark the specified region as being invalidated 83 /// Mark the specified region as being invalidated
93 void InvalidateRegion(CacheAddr addr, u64 size) { 84 void InvalidateRegion(VAddr addr, u64 size) {
94 std::lock_guard lock{mutex}; 85 std::lock_guard lock{mutex};
95 86
96 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 87 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -114,27 +105,20 @@ public:
114 105
115protected: 106protected:
116 /// Tries to get an object from the cache with the specified cache address 107 /// Tries to get an object from the cache with the specified cache address
117 T TryGet(CacheAddr addr) const { 108 T TryGet(VAddr addr) const {
118 const auto iter = map_cache.find(addr); 109 const auto iter = map_cache.find(addr);
119 if (iter != map_cache.end()) 110 if (iter != map_cache.end())
120 return iter->second; 111 return iter->second;
121 return nullptr; 112 return nullptr;
122 } 113 }
123 114
124 T TryGet(const void* addr) const {
125 const auto iter = map_cache.find(ToCacheAddr(addr));
126 if (iter != map_cache.end())
127 return iter->second;
128 return nullptr;
129 }
130
131 /// Register an object into the cache 115 /// Register an object into the cache
132 virtual void Register(const T& object) { 116 virtual void Register(const T& object) {
133 std::lock_guard lock{mutex}; 117 std::lock_guard lock{mutex};
134 118
135 object->SetIsRegistered(true); 119 object->SetIsRegistered(true);
136 interval_cache.add({GetInterval(object), ObjectSet{object}}); 120 interval_cache.add({GetInterval(object), ObjectSet{object}});
137 map_cache.insert({object->GetCacheAddr(), object}); 121 map_cache.insert({object->GetCpuAddr(), object});
138 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); 122 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
139 } 123 }
140 124
@@ -144,7 +128,7 @@ protected:
144 128
145 object->SetIsRegistered(false); 129 object->SetIsRegistered(false);
146 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); 130 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
147 const CacheAddr addr = object->GetCacheAddr(); 131 const VAddr addr = object->GetCpuAddr();
148 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 132 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
149 map_cache.erase(addr); 133 map_cache.erase(addr);
150 } 134 }
@@ -173,7 +157,7 @@ protected:
173 157
174private: 158private:
175 /// Returns a list of cached objects from the specified memory region, ordered by access time 159 /// Returns a list of cached objects from the specified memory region, ordered by access time
176 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { 160 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
177 if (size == 0) { 161 if (size == 0) {
178 return {}; 162 return {};
179 } 163 }
@@ -197,13 +181,13 @@ private:
197 } 181 }
198 182
199 using ObjectSet = std::set<T>; 183 using ObjectSet = std::set<T>;
200 using ObjectCache = std::unordered_map<CacheAddr, T>; 184 using ObjectCache = std::unordered_map<VAddr, T>;
201 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; 185 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
202 using ObjectInterval = typename IntervalCache::interval_type; 186 using ObjectInterval = typename IntervalCache::interval_type;
203 187
204 static auto GetInterval(const T& object) { 188 static auto GetInterval(const T& object) {
205 return ObjectInterval::right_open(object->GetCacheAddr(), 189 return ObjectInterval::right_open(object->GetCpuAddr(),
206 object->GetCacheAddr() + object->GetSizeInBytes()); 190 object->GetCpuAddr() + object->GetSizeInBytes());
207 } 191 }
208 192
209 ObjectCache map_cache; 193 ObjectCache map_cache;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1a68e3caa..8ae5b9c4e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -53,14 +53,14 @@ public:
53 virtual void FlushAll() = 0; 53 virtual void FlushAll() = 0;
54 54
55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
56 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 56 virtual void FlushRegion(VAddr addr, u64 size) = 0;
57 57
58 /// Notify rasterizer that any caches of the specified region should be invalidated 58 /// Notify rasterizer that any caches of the specified region should be invalidated
59 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; 59 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
60 60
61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
62 /// and invalidated 62 /// and invalidated
63 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 63 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
64 64
65 /// Notify the rasterizer to send all written commands to the host GPU. 65 /// Notify the rasterizer to send all written commands to the host GPU.
66 virtual void FlushCommands() = 0; 66 virtual void FlushCommands() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0375fca17..4eb37a96c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
23 23
24CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) 24CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
25 : VideoCommon::BufferBlock{cache_addr, size} { 25 : VideoCommon::BufferBlock{cpu_addr, size} {
26 gl_buffer.Create(); 26 gl_buffer.Create();
27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
28} 28}
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
48} 48}
49 49
50Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 50Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
51 return std::make_shared<CachedBufferBlock>(cache_addr, size); 51 return std::make_shared<CachedBufferBlock>(cpu_addr, size);
52} 52}
53 53
54void OGLBufferCache::WriteBarrier() { 54void OGLBufferCache::WriteBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c7145443..d94a11252 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
31 31
32class CachedBufferBlock : public VideoCommon::BufferBlock { 32class CachedBufferBlock : public VideoCommon::BufferBlock {
33public: 33public:
34 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); 34 explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
35 ~CachedBufferBlock(); 35 ~CachedBufferBlock();
36 36
37 const GLuint* GetHandle() const { 37 const GLuint* GetHandle() const {
@@ -55,7 +55,7 @@ public:
55 } 55 }
56 56
57protected: 57protected:
58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
59 59
60 void WriteBarrier() override; 60 void WriteBarrier() override;
61 61
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 1a2e2a9f7..c286502ba 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
131 return bindings; 131 return bindings;
132} 132}
133 133
134bool IsASTCSupported() {
135 static constexpr std::array formats = {
136 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
137 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
138 GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
139 GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
140 GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
141 GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
142 GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
143 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
144 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
145 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
146 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
147 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
148 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
149 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
150 };
151 return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
152 GLint supported;
153 glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
154 &supported);
155 return supported == GL_TRUE;
156 }) == formats.end();
157}
158
134} // Anonymous namespace 159} // Anonymous namespace
135 160
136Device::Device() : base_bindings{BuildBaseBindings()} { 161Device::Device() : base_bindings{BuildBaseBindings()} {
@@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
152 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 177 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
153 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 178 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
154 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 179 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
180 has_astc = IsASTCSupported();
155 has_variable_aoffi = TestVariableAoffi(); 181 has_variable_aoffi = TestVariableAoffi();
156 has_component_indexing_bug = is_amd; 182 has_component_indexing_bug = is_amd;
157 has_precise_bug = TestPreciseBug(); 183 has_precise_bug = TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index d73b099d0..a55050cb5 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -64,6 +64,10 @@ public:
64 return has_image_load_formatted; 64 return has_image_load_formatted;
65 } 65 }
66 66
67 bool HasASTC() const {
68 return has_astc;
69 }
70
67 bool HasVariableAoffi() const { 71 bool HasVariableAoffi() const {
68 return has_variable_aoffi; 72 return has_variable_aoffi;
69 } 73 }
@@ -97,6 +101,7 @@ private:
97 bool has_shader_ballot{}; 101 bool has_shader_ballot{};
98 bool has_vertex_viewport_layer{}; 102 bool has_vertex_viewport_layer{};
99 bool has_image_load_formatted{}; 103 bool has_image_load_formatted{};
104 bool has_astc{};
100 bool has_variable_aoffi{}; 105 bool has_variable_aoffi{};
101 bool has_component_indexing_bug{}; 106 bool has_component_indexing_bug{};
102 bool has_precise_bug{}; 107 bool has_precise_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 31add708f..368f399df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
386 texture_cache.GuardRenderTargets(true); 386 texture_cache.GuardRenderTargets(true);
387 View color_surface; 387 View color_surface;
388 if (using_color_fb) { 388 if (using_color_fb) {
389 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); 389 const std::size_t index = regs.clear_buffers.RT;
390 color_surface = texture_cache.GetColorBufferSurface(index, true);
391 texture_cache.MarkColorBufferInUse(index);
390 } 392 }
391 View depth_surface; 393 View depth_surface;
392 if (using_depth_fb || using_stencil_fb) { 394 if (using_depth_fb || using_stencil_fb) {
393 depth_surface = texture_cache.GetDepthBufferSurface(false); 395 depth_surface = texture_cache.GetDepthBufferSurface(true);
396 texture_cache.MarkDepthBufferInUse();
394 } 397 }
395 texture_cache.GuardRenderTargets(false); 398 texture_cache.GuardRenderTargets(false);
396 399
@@ -653,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
653 656
654void RasterizerOpenGL::FlushAll() {} 657void RasterizerOpenGL::FlushAll() {}
655 658
656void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { 659void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
657 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 660 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
658 if (!addr || !size) { 661 if (addr == 0 || size == 0) {
659 return; 662 return;
660 } 663 }
661 texture_cache.FlushRegion(addr, size); 664 texture_cache.FlushRegion(addr, size);
@@ -663,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
663 query_cache.FlushRegion(addr, size); 666 query_cache.FlushRegion(addr, size);
664} 667}
665 668
666void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 669void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
667 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 670 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
668 if (!addr || !size) { 671 if (addr == 0 || size == 0) {
669 return; 672 return;
670 } 673 }
671 texture_cache.InvalidateRegion(addr, size); 674 texture_cache.InvalidateRegion(addr, size);
@@ -674,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
674 query_cache.InvalidateRegion(addr, size); 677 query_cache.InvalidateRegion(addr, size);
675} 678}
676 679
677void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 680void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
678 if (Settings::values.use_accurate_gpu_emulation) { 681 if (Settings::values.use_accurate_gpu_emulation) {
679 FlushRegion(addr, size); 682 FlushRegion(addr, size);
680 } 683 }
@@ -713,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
713 716
714 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 717 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
715 718
716 const auto surface{ 719 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
717 texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
718 if (!surface) { 720 if (!surface) {
719 return {}; 721 return {};
720 } 722 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2d3be2437..212dad852 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -65,9 +65,9 @@ public:
65 void ResetCounter(VideoCore::QueryType type) override; 65 void ResetCounter(VideoCore::QueryType type) override;
66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
67 void FlushAll() override; 67 void FlushAll() override;
68 void FlushRegion(CacheAddr addr, u64 size) override; 68 void FlushRegion(VAddr addr, u64 size) override;
69 void InvalidateRegion(CacheAddr addr, u64 size) override; 69 void InvalidateRegion(VAddr addr, u64 size) override;
70 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 70 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
71 void FlushCommands() override; 71 void FlushCommands() override;
72 void TickFrame() override; 72 void TickFrame() override;
73 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 73 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 046ee55a5..6d2ff20f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
214 214
215} // Anonymous namespace 215} // Anonymous namespace
216 216
217CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, 217CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
218 std::shared_ptr<VideoCommon::Shader::Registry> registry, 218 std::shared_ptr<VideoCommon::Shader::Registry> registry,
219 ShaderEntries entries, std::shared_ptr<OGLProgram> program) 219 ShaderEntries entries, std::shared_ptr<OGLProgram> program)
220 : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, 220 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
221 cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} 221 size_in_bytes{size_in_bytes}, program{std::move(program)} {}
222 222
223CachedShader::~CachedShader() = default; 223CachedShader::~CachedShader() = default;
224 224
@@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
254 entry.bindless_samplers = registry->GetBindlessSamplers(); 254 entry.bindless_samplers = registry->GetBindlessSamplers();
255 params.disk_cache.SaveEntry(std::move(entry)); 255 params.disk_cache.SaveEntry(std::move(entry));
256 256
257 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, 257 return std::shared_ptr<CachedShader>(new CachedShader(
258 size_in_bytes, std::move(registry), 258 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
259 MakeEntries(ir), std::move(program)));
260} 259}
261 260
262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 261Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
279 entry.bindless_samplers = registry->GetBindlessSamplers(); 278 entry.bindless_samplers = registry->GetBindlessSamplers();
280 params.disk_cache.SaveEntry(std::move(entry)); 279 params.disk_cache.SaveEntry(std::move(entry));
281 280
282 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, 281 return std::shared_ptr<CachedShader>(new CachedShader(
283 size_in_bytes, std::move(registry), 282 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
284 MakeEntries(ir), std::move(program)));
285} 283}
286 284
287Shader CachedShader::CreateFromCache(const ShaderParameters& params, 285Shader CachedShader::CreateFromCache(const ShaderParameters& params,
288 const PrecompiledShader& precompiled_shader, 286 const PrecompiledShader& precompiled_shader,
289 std::size_t size_in_bytes) { 287 std::size_t size_in_bytes) {
290 return std::shared_ptr<CachedShader>(new CachedShader( 288 return std::shared_ptr<CachedShader>(
291 params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, 289 new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
292 precompiled_shader.entries, precompiled_shader.program)); 290 precompiled_shader.entries, precompiled_shader.program));
293} 291}
294 292
295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 293ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
449 const GPUVAddr address{GetShaderAddress(system, program)}; 447 const GPUVAddr address{GetShaderAddress(system, program)};
450 448
451 // Look up shader in the cache based on address 449 // Look up shader in the cache based on address
452 const auto host_ptr{memory_manager.GetPointer(address)}; 450 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
453 Shader shader{TryGet(host_ptr)}; 451 Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
454 if (shader) { 452 if (shader) {
455 return last_shaders[static_cast<std::size_t>(program)] = shader; 453 return last_shaders[static_cast<std::size_t>(program)] = shader;
456 } 454 }
457 455
456 const auto host_ptr{memory_manager.GetPointer(address)};
457
458 // No shader found - create a new one 458 // No shader found - create a new one
459 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; 459 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
460 ProgramCode code_b; 460 ProgramCode code_b;
@@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
465 465
466 const auto unique_identifier = GetUniqueIdentifier( 466 const auto unique_identifier = GetUniqueIdentifier(
467 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 467 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
468 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 468
469 const ShaderParameters params{system, disk_cache, device, 469 const ShaderParameters params{system, disk_cache, device,
470 cpu_addr, host_ptr, unique_identifier}; 470 *cpu_addr, host_ptr, unique_identifier};
471 471
472 const auto found = runtime_cache.find(unique_identifier); 472 const auto found = runtime_cache.find(unique_identifier);
473 if (found == runtime_cache.end()) { 473 if (found == runtime_cache.end()) {
@@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
484 484
485Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 485Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
486 auto& memory_manager{system.GPU().MemoryManager()}; 486 auto& memory_manager{system.GPU().MemoryManager()};
487 const auto host_ptr{memory_manager.GetPointer(code_addr)}; 487 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
488 auto kernel = TryGet(host_ptr); 488
489 auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
489 if (kernel) { 490 if (kernel) {
490 return kernel; 491 return kernel;
491 } 492 }
492 493
494 const auto host_ptr{memory_manager.GetPointer(code_addr)};
493 // No kernel found, create a new one 495 // No kernel found, create a new one
494 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 496 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
495 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 497 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
496 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 498
497 const ShaderParameters params{system, disk_cache, device, 499 const ShaderParameters params{system, disk_cache, device,
498 cpu_addr, host_ptr, unique_identifier}; 500 *cpu_addr, host_ptr, unique_identifier};
499 501
500 const auto found = runtime_cache.find(unique_identifier); 502 const auto found = runtime_cache.find(unique_identifier);
501 if (found == runtime_cache.end()) { 503 if (found == runtime_cache.end()) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 4935019fc..c836df5bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -65,11 +65,6 @@ public:
65 /// Gets the GL program handle for the shader 65 /// Gets the GL program handle for the shader
66 GLuint GetHandle() const; 66 GLuint GetHandle() const;
67 67
68 /// Returns the guest CPU address of the shader
69 VAddr GetCpuAddr() const override {
70 return cpu_addr;
71 }
72
73 /// Returns the size in bytes of the shader 68 /// Returns the size in bytes of the shader
74 std::size_t GetSizeInBytes() const override { 69 std::size_t GetSizeInBytes() const override {
75 return size_in_bytes; 70 return size_in_bytes;
@@ -90,13 +85,12 @@ public:
90 std::size_t size_in_bytes); 85 std::size_t size_in_bytes);
91 86
92private: 87private:
93 explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, 88 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
94 std::shared_ptr<VideoCommon::Shader::Registry> registry, 89 std::shared_ptr<VideoCommon::Shader::Registry> registry,
95 ShaderEntries entries, std::shared_ptr<OGLProgram> program); 90 ShaderEntries entries, std::shared_ptr<OGLProgram> program);
96 91
97 std::shared_ptr<VideoCommon::Shader::Registry> registry; 92 std::shared_ptr<VideoCommon::Shader::Registry> registry;
98 ShaderEntries entries; 93 ShaderEntries entries;
99 VAddr cpu_addr = 0;
100 std::size_t size_in_bytes = 0; 94 std::size_t size_in_bytes = 0;
101 std::shared_ptr<OGLProgram> program; 95 std::shared_ptr<OGLProgram> program;
102}; 96};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c7d24cf14..160ae4340 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -31,11 +31,11 @@ namespace {
31 31
32using Tegra::Engines::ShaderType; 32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute; 33using Tegra::Shader::Attribute;
34using Tegra::Shader::AttributeUse;
35using Tegra::Shader::Header; 34using Tegra::Shader::Header;
36using Tegra::Shader::IpaInterpMode; 35using Tegra::Shader::IpaInterpMode;
37using Tegra::Shader::IpaMode; 36using Tegra::Shader::IpaMode;
38using Tegra::Shader::IpaSampleMode; 37using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using VideoCommon::Shader::BuildTransformFeedback; 40using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry; 41using VideoCommon::Shader::Registry;
@@ -702,20 +702,19 @@ private:
702 code.AddNewLine(); 702 code.AddNewLine();
703 } 703 }
704 704
705 std::string GetInputFlags(AttributeUse attribute) { 705 const char* GetInputFlags(PixelImap attribute) {
706 switch (attribute) { 706 switch (attribute) {
707 case AttributeUse::Perspective: 707 case PixelImap::Perspective:
708 // Default, Smooth 708 return "smooth";
709 return {}; 709 case PixelImap::Constant:
710 case AttributeUse::Constant: 710 return "flat";
711 return "flat "; 711 case PixelImap::ScreenLinear:
712 case AttributeUse::ScreenLinear: 712 return "noperspective";
713 return "noperspective "; 713 case PixelImap::Unused:
714 default: 714 break;
715 case AttributeUse::Unused:
716 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
717 return {};
718 } 715 }
716 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
717 return {};
719 } 718 }
720 719
721 void DeclareInputAttributes() { 720 void DeclareInputAttributes() {
@@ -749,8 +748,8 @@ private:
749 748
750 std::string suffix; 749 std::string suffix;
751 if (stage == ShaderType::Fragment) { 750 if (stage == ShaderType::Fragment) {
752 const auto input_mode{header.ps.GetAttributeUse(location)}; 751 const auto input_mode{header.ps.GetPixelImap(location)};
753 if (skip_unused && input_mode == AttributeUse::Unused) { 752 if (input_mode == PixelImap::Unused) {
754 return; 753 return;
755 } 754 }
756 suffix = GetInputFlags(input_mode); 755 suffix = GetInputFlags(input_mode);
@@ -927,7 +926,7 @@ private:
927 const u32 address{generic_base + index * generic_stride + element * element_stride}; 926 const u32 address{generic_base + index * generic_stride + element * element_stride};
928 927
929 const bool declared = stage != ShaderType::Fragment || 928 const bool declared = stage != ShaderType::Fragment ||
930 header.ps.GetAttributeUse(index) != AttributeUse::Unused; 929 header.ps.GetPixelImap(index) != PixelImap::Unused;
931 const std::string value = 930 const std::string value =
932 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; 931 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
933 code.AddLine("case 0x{:X}U: return {};", address, value); 932 code.AddLine("case 0x{:X}U: return {};", address, value);
@@ -1142,8 +1141,7 @@ private:
1142 GetSwizzle(element)), 1141 GetSwizzle(element)),
1143 Type::Float}; 1142 Type::Float};
1144 case ShaderType::Fragment: 1143 case ShaderType::Fragment:
1145 return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), 1144 return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
1146 Type::Float};
1147 default: 1145 default:
1148 UNREACHABLE(); 1146 UNREACHABLE();
1149 } 1147 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index f424e3000..36590a6d0 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource;
24using VideoCore::MortonSwizzleMode; 24using VideoCore::MortonSwizzleMode;
25 25
26using VideoCore::Surface::PixelFormat; 26using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::SurfaceCompression;
28using VideoCore::Surface::SurfaceTarget; 27using VideoCore::Surface::SurfaceTarget;
29using VideoCore::Surface::SurfaceType; 28using VideoCore::Surface::SurfaceType;
30 29
@@ -37,102 +36,100 @@ namespace {
37 36
38struct FormatTuple { 37struct FormatTuple {
39 GLint internal_format; 38 GLint internal_format;
40 GLenum format; 39 GLenum format = GL_NONE;
41 GLenum type; 40 GLenum type = GL_NONE;
42 bool compressed;
43}; 41};
44 42
45constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 43constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
46 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U 44 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U
47 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S 45 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S
48 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI 46 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI
49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U 47 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U
50 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U 48 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U
51 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U 49 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U
52 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U 50 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U
53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI 51 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI
54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 52 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U 53 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U
56 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S 54 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S
57 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI 55 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI
58 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F 56 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
59 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI 57 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI
60 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 58 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1
61 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 59 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23
62 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 60 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45
63 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 61 {GL_COMPRESSED_RED_RGTC1}, // DXN1
64 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM 62 {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM
65 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM 63 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM
66 {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U 64 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U
67 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16 65 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16
68 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16 66 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16
69 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 67 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4
70 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 68 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
71 {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F 69 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
72 {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F 70 {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F
73 {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F 71 {GL_R32F, GL_RED, GL_FLOAT}, // R32F
74 {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F 72 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F
75 {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U 73 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U
76 {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S 74 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S
77 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI 75 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI
78 {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I 76 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I
79 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16 77 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16
80 {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F 78 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F
81 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI 79 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI
82 {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I 80 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I
83 {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S 81 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S
84 {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F 82 {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F
85 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB 83 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
86 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U 84 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
87 {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S 85 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
88 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI 86 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
89 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F 87 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
90 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI 88 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
91 {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I 89 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I
92 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 90 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8
93 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 91 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5
94 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 92 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4
95 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 93 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
96 // Compressed sRGB formats 94 // Compressed sRGB formats
97 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB 95 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB
98 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB 96 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB
99 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB 97 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB
100 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB 98 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB
101 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U 99 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
102 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB 100 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
103 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB 101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
104 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB 102 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
105 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB 103 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
106 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5 104 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5
107 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB 105 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
108 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8 106 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8
109 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB 107 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
110 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6 108 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6
111 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB 109 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
112 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10 110 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10
113 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB 111 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
114 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12 112 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12
115 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB 113 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
116 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6 114 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6
117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB 115 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
118 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5 116 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5
119 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB 117 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
120 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F 118 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F
121 119
122 // Depth formats 120 // Depth formats
123 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F 121 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F
124 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16 122 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
125 123
126 // DepthStencil formats 124 // DepthStencil formats
127 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8 125 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8
128 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24 126 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24
129 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8 127 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
130}}; 128}};
131 129
132const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { 130const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
133 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 131 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
134 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; 132 return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
135 return format;
136} 133}
137 134
138GLenum GetTextureTarget(const SurfaceTarget& target) { 135GLenum GetTextureTarget(const SurfaceTarget& target) {
@@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
242 239
243} // Anonymous namespace 240} // Anonymous namespace
244 241
245CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) 242CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
246 : VideoCommon::SurfaceBase<View>(gpu_addr, params) { 243 bool is_astc_supported)
247 const auto& tuple{GetFormatTuple(params.pixel_format)}; 244 : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
248 internal_format = tuple.internal_format; 245 if (is_converted) {
249 format = tuple.format; 246 internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
250 type = tuple.type; 247 format = GL_RGBA;
251 is_compressed = tuple.compressed; 248 type = GL_UNSIGNED_BYTE;
249 } else {
250 const auto& tuple{GetFormatTuple(params.pixel_format)};
251 internal_format = tuple.internal_format;
252 format = tuple.format;
253 type = tuple.type;
254 is_compressed = params.IsCompressed();
255 }
252 target = GetTextureTarget(params.target); 256 target = GetTextureTarget(params.target);
253 texture = CreateTexture(params, target, internal_format, texture_buffer); 257 texture = CreateTexture(params, target, internal_format, texture_buffer);
254 DecorateSurfaceName(); 258 DecorateSurfaceName();
@@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
264 268
265 if (params.IsBuffer()) { 269 if (params.IsBuffer()) {
266 glGetNamedBufferSubData(texture_buffer.handle, 0, 270 glGetNamedBufferSubData(texture_buffer.handle, 0,
267 static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), 271 static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
268 staging_buffer.data()); 272 staging_buffer.data());
269 return; 273 return;
270 } 274 }
@@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
272 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); 276 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
273 277
274 for (u32 level = 0; level < params.emulated_levels; ++level) { 278 for (u32 level = 0; level < params.emulated_levels; ++level) {
275 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); 279 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
276 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 280 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
277 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); 281 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
282
278 u8* const mip_data = staging_buffer.data() + mip_offset; 283 u8* const mip_data = staging_buffer.data() + mip_offset;
279 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); 284 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
280 if (is_compressed) { 285 if (is_compressed) {
@@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
294} 299}
295 300
296void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { 301void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
297 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); 302 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
298 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 303 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
299 304
300 auto compression_type = params.GetCompressionType(); 305 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
301
302 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
303 ? params.GetConvertedMipmapOffset(level)
304 : params.GetHostMipmapLevelOffset(level);
305 const u8* buffer{staging_buffer.data() + mip_offset}; 306 const u8* buffer{staging_buffer.data() + mip_offset};
306 if (is_compressed) { 307 if (is_compressed) {
307 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; 308 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
@@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
482TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, 483TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
483 VideoCore::RasterizerInterface& rasterizer, 484 VideoCore::RasterizerInterface& rasterizer,
484 const Device& device, StateTracker& state_tracker) 485 const Device& device, StateTracker& state_tracker)
485 : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { 486 : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
486 src_framebuffer.Create(); 487 src_framebuffer.Create();
487 dst_framebuffer.Create(); 488 dst_framebuffer.Create();
488} 489}
@@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
490TextureCacheOpenGL::~TextureCacheOpenGL() = default; 491TextureCacheOpenGL::~TextureCacheOpenGL() = default;
491 492
492Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 493Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
493 return std::make_shared<CachedSurface>(gpu_addr, params); 494 return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
494} 495}
495 496
496void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, 497void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
596 597
597 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); 598 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
598 599
599 if (source_format.compressed) { 600 if (src_surface->IsCompressed()) {
600 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), 601 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
601 nullptr); 602 nullptr);
602 } else { 603 } else {
@@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
610 const GLsizei width = static_cast<GLsizei>(dst_params.width); 611 const GLsizei width = static_cast<GLsizei>(dst_params.width);
611 const GLsizei height = static_cast<GLsizei>(dst_params.height); 612 const GLsizei height = static_cast<GLsizei>(dst_params.height);
612 const GLsizei depth = static_cast<GLsizei>(dst_params.depth); 613 const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
613 if (dest_format.compressed) { 614 if (dst_surface->IsCompressed()) {
614 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); 615 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
615 UNREACHABLE(); 616 UNREACHABLE();
616 } else { 617 } else {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 6658c6ffd..02d9981a1 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
37 friend CachedSurfaceView; 37 friend CachedSurfaceView;
38 38
39public: 39public:
40 explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); 40 explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
41 ~CachedSurface(); 41 ~CachedSurface();
42 42
43 void UploadTexture(const std::vector<u8>& staging_buffer) override; 43 void UploadTexture(const std::vector<u8>& staging_buffer) override;
@@ -51,6 +51,10 @@ public:
51 return texture.handle; 51 return texture.handle;
52 } 52 }
53 53
54 bool IsCompressed() const {
55 return is_compressed;
56 }
57
54protected: 58protected:
55 void DecorateSurfaceName() override; 59 void DecorateSurfaceName() override;
56 60
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1ba544943..326d74f29 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
42} // Anonymous namespace 42} // Anonymous namespace
43 43
44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
45 CacheAddr cache_addr, std::size_t size) 45 VAddr cpu_addr, std::size_t size)
46 : VideoCommon::BufferBlock{cache_addr, size} { 46 : VideoCommon::BufferBlock{cpu_addr, size} {
47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), 47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size),
48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | 48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc |
49 vk::BufferUsageFlagBits::eTransferDst, 49 vk::BufferUsageFlagBits::eTransferDst,
@@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S
68 68
69VKBufferCache::~VKBufferCache() = default; 69VKBufferCache::~VKBufferCache() = default;
70 70
71Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 71Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); 72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
73} 73}
74 74
75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { 75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3f38eed0c..508214618 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -30,7 +30,7 @@ class VKScheduler;
30class CachedBufferBlock final : public VideoCommon::BufferBlock { 30class CachedBufferBlock final : public VideoCommon::BufferBlock {
31public: 31public:
32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
33 CacheAddr cache_addr, std::size_t size); 33 VAddr cpu_addr, std::size_t size);
34 ~CachedBufferBlock(); 34 ~CachedBufferBlock();
35 35
36 const vk::Buffer* GetHandle() const { 36 const vk::Buffer* GetHandle() const {
@@ -55,7 +55,7 @@ public:
55protected: 55protected:
56 void WriteBarrier() override {} 56 void WriteBarrier() override {}
57 57
58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
59 59
60 const vk::Buffer* ToHandle(const Buffer& buffer) override; 60 const vk::Buffer* ToHandle(const Buffer& buffer) override;
61 61
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 28d2fbc4f..7aafb5e59 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -237,18 +237,21 @@ void VKDevice::ReportLoss() const {
237 237
238bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, 238bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
239 const vk::DispatchLoaderDynamic& dldi) const { 239 const vk::DispatchLoaderDynamic& dldi) const {
240 // Disable for now to avoid converting ASTC twice.
241 return false;
242 static constexpr std::array astc_formats = { 240 static constexpr std::array astc_formats = {
243 vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, 241 vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
244 vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock, 242 vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock,
245 vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, 243 vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock,
246 vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, 244 vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock,
247 vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, 245 vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock,
248 vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, 246 vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock,
249 vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock,
250 vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, 247 vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock,
251 vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock}; 248 vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock,
249 vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock,
250 vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock,
251 vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock,
252 vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock,
253 vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock,
254 vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock};
252 if (!features.textureCompressionASTC_LDR) { 255 if (!features.textureCompressionASTC_LDR) {
253 return false; 256 return false;
254 } 257 }
@@ -572,24 +575,34 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
572 vk::Format::eBc2SrgbBlock, 575 vk::Format::eBc2SrgbBlock,
573 vk::Format::eBc3SrgbBlock, 576 vk::Format::eBc3SrgbBlock,
574 vk::Format::eBc7SrgbBlock, 577 vk::Format::eBc7SrgbBlock,
578 vk::Format::eAstc4x4UnormBlock,
575 vk::Format::eAstc4x4SrgbBlock, 579 vk::Format::eAstc4x4SrgbBlock,
576 vk::Format::eAstc8x8SrgbBlock, 580 vk::Format::eAstc5x4UnormBlock,
577 vk::Format::eAstc8x5SrgbBlock,
578 vk::Format::eAstc5x4SrgbBlock, 581 vk::Format::eAstc5x4SrgbBlock,
579 vk::Format::eAstc5x5UnormBlock, 582 vk::Format::eAstc5x5UnormBlock,
580 vk::Format::eAstc5x5SrgbBlock, 583 vk::Format::eAstc5x5SrgbBlock,
581 vk::Format::eAstc10x8UnormBlock, 584 vk::Format::eAstc6x5UnormBlock,
582 vk::Format::eAstc10x8SrgbBlock, 585 vk::Format::eAstc6x5SrgbBlock,
583 vk::Format::eAstc6x6UnormBlock, 586 vk::Format::eAstc6x6UnormBlock,
584 vk::Format::eAstc6x6SrgbBlock, 587 vk::Format::eAstc6x6SrgbBlock,
588 vk::Format::eAstc8x5UnormBlock,
589 vk::Format::eAstc8x5SrgbBlock,
590 vk::Format::eAstc8x6UnormBlock,
591 vk::Format::eAstc8x6SrgbBlock,
592 vk::Format::eAstc8x8UnormBlock,
593 vk::Format::eAstc8x8SrgbBlock,
594 vk::Format::eAstc10x5UnormBlock,
595 vk::Format::eAstc10x5SrgbBlock,
596 vk::Format::eAstc10x6UnormBlock,
597 vk::Format::eAstc10x6SrgbBlock,
598 vk::Format::eAstc10x8UnormBlock,
599 vk::Format::eAstc10x8SrgbBlock,
585 vk::Format::eAstc10x10UnormBlock, 600 vk::Format::eAstc10x10UnormBlock,
586 vk::Format::eAstc10x10SrgbBlock, 601 vk::Format::eAstc10x10SrgbBlock,
602 vk::Format::eAstc12x10UnormBlock,
603 vk::Format::eAstc12x10SrgbBlock,
587 vk::Format::eAstc12x12UnormBlock, 604 vk::Format::eAstc12x12UnormBlock,
588 vk::Format::eAstc12x12SrgbBlock, 605 vk::Format::eAstc12x12SrgbBlock,
589 vk::Format::eAstc8x6UnormBlock,
590 vk::Format::eAstc8x6SrgbBlock,
591 vk::Format::eAstc6x5UnormBlock,
592 vk::Format::eAstc6x5SrgbBlock,
593 vk::Format::eE5B9G9R9UfloatPack32}; 606 vk::Format::eE5B9G9R9UfloatPack32};
594 std::unordered_map<vk::Format, vk::FormatProperties> format_properties; 607 std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
595 for (const auto format : formats) { 608 for (const auto format : formats) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 557b9d662..c2a426aeb 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
158} // Anonymous namespace 158} // Anonymous namespace
159 159
160CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, 160CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, 161 GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
162 ProgramCode program_code, u32 main_offset) 162 u32 main_offset)
163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, 163 : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
164 program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, 164 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
165 shader_ir{this->program_code, main_offset, compiler_settings, registry}, 165 compiler_settings, registry},
166 entries{GenerateShaderEntries(shader_ir)} {} 166 entries{GenerateShaderEntries(shader_ir)} {}
167 167
168CachedShader::~CachedShader() = default; 168CachedShader::~CachedShader() = default;
@@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
201 201
202 auto& memory_manager{system.GPU().MemoryManager()}; 202 auto& memory_manager{system.GPU().MemoryManager()};
203 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 203 const GPUVAddr program_addr{GetShaderAddress(system, program)};
204 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 204 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
205 auto shader = TryGet(host_ptr); 205 ASSERT(cpu_addr);
206 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
206 if (!shader) { 207 if (!shader) {
208 const auto host_ptr{memory_manager.GetPointer(program_addr)};
209
207 // No shader found - create a new one 210 // No shader found - create a new one
208 constexpr u32 stage_offset = 10; 211 constexpr u32 stage_offset = 10;
209 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); 212 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
210 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); 213 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
211 214
212 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
213 ASSERT(cpu_addr);
214
215 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, 215 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
216 host_ptr, std::move(code), stage_offset); 216 std::move(code), stage_offset);
217 Register(shader); 217 Register(shader);
218 } 218 }
219 shaders[index] = std::move(shader); 219 shaders[index] = std::move(shader);
@@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
253 253
254 auto& memory_manager = system.GPU().MemoryManager(); 254 auto& memory_manager = system.GPU().MemoryManager();
255 const auto program_addr = key.shader; 255 const auto program_addr = key.shader;
256 const auto host_ptr = memory_manager.GetPointer(program_addr);
257 256
258 auto shader = TryGet(host_ptr); 257 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
258 ASSERT(cpu_addr);
259
260 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
259 if (!shader) { 261 if (!shader) {
260 // No shader found - create a new one 262 // No shader found - create a new one
261 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 263 const auto host_ptr = memory_manager.GetPointer(program_addr);
262 ASSERT(cpu_addr);
263 264
264 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); 265 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
265 constexpr u32 kernel_main_offset = 0; 266 constexpr u32 kernel_main_offset = 0;
266 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 267 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
267 program_addr, *cpu_addr, host_ptr, std::move(code), 268 program_addr, *cpu_addr, std::move(code),
268 kernel_main_offset); 269 kernel_main_offset);
269 Register(shader); 270 Register(shader);
270 } 271 }
@@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
345 } 346 }
346 347
347 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 348 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
348 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 349 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
349 const auto shader = TryGet(host_ptr); 350 ASSERT(cpu_addr);
351 const auto shader = TryGet(*cpu_addr);
350 ASSERT(shader); 352 ASSERT(shader);
351 353
352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 354 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c4c112290..27c01732f 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -113,17 +113,13 @@ namespace Vulkan {
113class CachedShader final : public RasterizerCacheObject { 113class CachedShader final : public RasterizerCacheObject {
114public: 114public:
115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, 115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
116 VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); 116 VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
117 ~CachedShader(); 117 ~CachedShader();
118 118
119 GPUVAddr GetGpuAddr() const { 119 GPUVAddr GetGpuAddr() const {
120 return gpu_addr; 120 return gpu_addr;
121 } 121 }
122 122
123 VAddr GetCpuAddr() const override {
124 return cpu_addr;
125 }
126
127 std::size_t GetSizeInBytes() const override { 123 std::size_t GetSizeInBytes() const override {
128 return program_code.size() * sizeof(u64); 124 return program_code.size() * sizeof(u64);
129 } 125 }
@@ -149,7 +145,6 @@ private:
149 Tegra::Engines::ShaderType stage); 145 Tegra::Engines::ShaderType stage);
150 146
151 GPUVAddr gpu_addr{}; 147 GPUVAddr gpu_addr{};
152 VAddr cpu_addr{};
153 ProgramCode program_code; 148 ProgramCode program_code;
154 VideoCommon::Shader::Registry registry; 149 VideoCommon::Shader::Registry registry;
155 VideoCommon::Shader::ShaderIR shader_ir; 150 VideoCommon::Shader::ShaderIR shader_ir;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 58c69b786..0a2ea4fd4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
495 495
496void RasterizerVulkan::FlushAll() {} 496void RasterizerVulkan::FlushAll() {}
497 497
498void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { 498void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
499 if (addr == 0 || size == 0) {
500 return;
501 }
499 texture_cache.FlushRegion(addr, size); 502 texture_cache.FlushRegion(addr, size);
500 buffer_cache.FlushRegion(addr, size); 503 buffer_cache.FlushRegion(addr, size);
501 query_cache.FlushRegion(addr, size); 504 query_cache.FlushRegion(addr, size);
502} 505}
503 506
504void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { 507void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
508 if (addr == 0 || size == 0) {
509 return;
510 }
505 texture_cache.InvalidateRegion(addr, size); 511 texture_cache.InvalidateRegion(addr, size);
506 pipeline_cache.InvalidateRegion(addr, size); 512 pipeline_cache.InvalidateRegion(addr, size);
507 buffer_cache.InvalidateRegion(addr, size); 513 buffer_cache.InvalidateRegion(addr, size);
508 query_cache.InvalidateRegion(addr, size); 514 query_cache.InvalidateRegion(addr, size);
509} 515}
510 516
511void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 517void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
512 FlushRegion(addr, size); 518 FlushRegion(addr, size);
513 InvalidateRegion(addr, size); 519 InvalidateRegion(addr, size);
514} 520}
@@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
540 return false; 546 return false;
541 } 547 }
542 548
543 const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; 549 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
544 const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
545 if (!surface) { 550 if (!surface) {
546 return false; 551 return false;
547 } 552 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3185868e9..f642dde76 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -118,9 +118,9 @@ public:
118 void ResetCounter(VideoCore::QueryType type) override; 118 void ResetCounter(VideoCore::QueryType type) override;
119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
120 void FlushAll() override; 120 void FlushAll() override;
121 void FlushRegion(CacheAddr addr, u64 size) override; 121 void FlushRegion(VAddr addr, u64 size) override;
122 void InvalidateRegion(CacheAddr addr, u64 size) override; 122 void InvalidateRegion(VAddr addr, u64 size) override;
123 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 123 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
124 void FlushCommands() override; 124 void FlushCommands() override;
125 void TickFrame() override; 125 void TickFrame() override;
126 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 126 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index d67f08cf9..b9f9e2714 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -35,7 +35,7 @@ namespace {
35using Sirit::Id; 35using Sirit::Id;
36using Tegra::Engines::ShaderType; 36using Tegra::Engines::ShaderType;
37using Tegra::Shader::Attribute; 37using Tegra::Shader::Attribute;
38using Tegra::Shader::AttributeUse; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using namespace VideoCommon::Shader; 40using namespace VideoCommon::Shader;
41 41
@@ -752,16 +752,16 @@ private:
752 if (stage != ShaderType::Fragment) { 752 if (stage != ShaderType::Fragment) {
753 continue; 753 continue;
754 } 754 }
755 switch (header.ps.GetAttributeUse(location)) { 755 switch (header.ps.GetPixelImap(location)) {
756 case AttributeUse::Constant: 756 case PixelImap::Constant:
757 Decorate(id, spv::Decoration::Flat); 757 Decorate(id, spv::Decoration::Flat);
758 break; 758 break;
759 case AttributeUse::ScreenLinear: 759 case PixelImap::Perspective:
760 Decorate(id, spv::Decoration::NoPerspective);
761 break;
762 case AttributeUse::Perspective:
763 // Default 760 // Default
764 break; 761 break;
762 case PixelImap::ScreenLinear:
763 Decorate(id, spv::Decoration::NoPerspective);
764 break;
765 default: 765 default:
766 UNREACHABLE_MSG("Unused attribute being fetched"); 766 UNREACHABLE_MSG("Unused attribute being fetched");
767 } 767 }
@@ -1145,9 +1145,6 @@ private:
1145 switch (attribute) { 1145 switch (attribute) {
1146 case Attribute::Index::Position: { 1146 case Attribute::Index::Position: {
1147 if (stage == ShaderType::Fragment) { 1147 if (stage == ShaderType::Fragment) {
1148 if (element == 3) {
1149 return {Constant(t_float, 1.0f), Type::Float};
1150 }
1151 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), 1148 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
1152 Type::Float}; 1149 Type::Float};
1153 } 1150 }
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 26175921b..5b9b39670 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -35,7 +35,6 @@ using VideoCore::MortonSwizzleMode;
35 35
36using Tegra::Texture::SwizzleSource; 36using Tegra::Texture::SwizzleSource;
37using VideoCore::Surface::PixelFormat; 37using VideoCore::Surface::PixelFormat;
38using VideoCore::Surface::SurfaceCompression;
39using VideoCore::Surface::SurfaceTarget; 38using VideoCore::Surface::SurfaceTarget;
40 39
41namespace { 40namespace {
@@ -96,9 +95,10 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) {
96 return {}; 95 return {};
97} 96}
98 97
99UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { 98UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
99 std::size_t host_memory_size) {
100 // TODO(Rodrigo): Move texture buffer creation to the buffer cache 100 // TODO(Rodrigo): Move texture buffer creation to the buffer cache
101 const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), 101 const vk::BufferCreateInfo buffer_ci({}, host_memory_size,
102 vk::BufferUsageFlagBits::eUniformTexelBuffer | 102 vk::BufferUsageFlagBits::eUniformTexelBuffer |
103 vk::BufferUsageFlagBits::eTransferSrc | 103 vk::BufferUsageFlagBits::eTransferSrc |
104 vk::BufferUsageFlagBits::eTransferDst, 104 vk::BufferUsageFlagBits::eTransferDst,
@@ -110,12 +110,13 @@ UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
110 110
111vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, 111vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
112 const SurfaceParams& params, 112 const SurfaceParams& params,
113 vk::Buffer buffer) { 113 vk::Buffer buffer,
114 std::size_t host_memory_size) {
114 ASSERT(params.IsBuffer()); 115 ASSERT(params.IsBuffer());
115 116
116 const auto format = 117 const auto format =
117 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; 118 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
118 return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); 119 return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size);
119} 120}
120 121
121vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { 122vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
@@ -169,14 +170,15 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
169 VKResourceManager& resource_manager, VKMemoryManager& memory_manager, 170 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
170 VKScheduler& scheduler, VKStagingBufferPool& staging_pool, 171 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
171 GPUVAddr gpu_addr, const SurfaceParams& params) 172 GPUVAddr gpu_addr, const SurfaceParams& params)
172 : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, 173 : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
173 resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, 174 device{device}, resource_manager{resource_manager},
174 staging_pool{staging_pool} { 175 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
175 if (params.IsBuffer()) { 176 if (params.IsBuffer()) {
176 buffer = CreateBuffer(device, params); 177 buffer = CreateBuffer(device, params, host_memory_size);
177 commit = memory_manager.Commit(*buffer, false); 178 commit = memory_manager.Commit(*buffer, false);
178 179
179 const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); 180 const auto buffer_view_ci =
181 GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
180 format = buffer_view_ci.format; 182 format = buffer_view_ci.format;
181 183
182 const auto dev = device.GetLogical(); 184 const auto dev = device.GetLogical();
@@ -255,7 +257,7 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
255 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 257 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
256 258
257 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, 259 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
258 size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { 260 size = host_memory_size](auto cmdbuf, auto& dld) {
259 const vk::BufferCopy copy(0, 0, size); 261 const vk::BufferCopy copy(0, 0, size);
260 cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); 262 cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
261 263
@@ -299,10 +301,7 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
299 301
300vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { 302vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
301 const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; 303 const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
302 const auto compression_type = params.GetCompressionType(); 304 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
303 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
304 ? params.GetConvertedMipmapOffset(level)
305 : params.GetHostMipmapLevelOffset(level);
306 305
307 return vk::BufferImageCopy( 306 return vk::BufferImageCopy(
308 mip_offset, 0, 0, 307 mip_offset, 0, 0,
@@ -390,8 +389,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf
390 const VKDevice& device, VKResourceManager& resource_manager, 389 const VKDevice& device, VKResourceManager& resource_manager,
391 VKMemoryManager& memory_manager, VKScheduler& scheduler, 390 VKMemoryManager& memory_manager, VKScheduler& scheduler,
392 VKStagingBufferPool& staging_pool) 391 VKStagingBufferPool& staging_pool)
393 : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, 392 : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
394 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} 393 resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
394 staging_pool{staging_pool} {}
395 395
396VKTextureCache::~VKTextureCache() = default; 396VKTextureCache::~VKTextureCache() = default;
397 397
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
13#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
16#include "video_core/textures/texture.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
18 19
19using Tegra::Shader::Instruction; 20using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode; 21using Tegra::Shader::OpCode;
22using Tegra::Shader::PredCondition;
23using Tegra::Shader::StoreType;
24using Tegra::Texture::ComponentType;
25using Tegra::Texture::TextureFormat;
26using Tegra::Texture::TICEntry;
21 27
22namespace { 28namespace {
29
30ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
31 std::size_t component) {
32 const TextureFormat format{descriptor.format};
33 switch (format) {
34 case TextureFormat::R16_G16_B16_A16:
35 case TextureFormat::R32_G32_B32_A32:
36 case TextureFormat::R32_G32_B32:
37 case TextureFormat::R32_G32:
38 case TextureFormat::R16_G16:
39 case TextureFormat::R32:
40 case TextureFormat::R16:
41 case TextureFormat::R8:
42 case TextureFormat::R1:
43 if (component == 0) {
44 return descriptor.r_type;
45 }
46 if (component == 1) {
47 return descriptor.g_type;
48 }
49 if (component == 2) {
50 return descriptor.b_type;
51 }
52 if (component == 3) {
53 return descriptor.a_type;
54 }
55 break;
56 case TextureFormat::A8R8G8B8:
57 if (component == 0) {
58 return descriptor.a_type;
59 }
60 if (component == 1) {
61 return descriptor.r_type;
62 }
63 if (component == 2) {
64 return descriptor.g_type;
65 }
66 if (component == 3) {
67 return descriptor.b_type;
68 }
69 break;
70 case TextureFormat::A2B10G10R10:
71 case TextureFormat::A4B4G4R4:
72 case TextureFormat::A5B5G5R1:
73 case TextureFormat::A1B5G5R5:
74 if (component == 0) {
75 return descriptor.a_type;
76 }
77 if (component == 1) {
78 return descriptor.b_type;
79 }
80 if (component == 2) {
81 return descriptor.g_type;
82 }
83 if (component == 3) {
84 return descriptor.r_type;
85 }
86 break;
87 case TextureFormat::R32_B24G8:
88 if (component == 0) {
89 return descriptor.r_type;
90 }
91 if (component == 1) {
92 return descriptor.b_type;
93 }
94 if (component == 2) {
95 return descriptor.g_type;
96 }
97 break;
98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5:
100 if (component == 0) {
101 return descriptor.b_type;
102 }
103 if (component == 1) {
104 return descriptor.g_type;
105 }
106 if (component == 2) {
107 return descriptor.r_type;
108 }
109 break;
110 case TextureFormat::G8R24:
111 case TextureFormat::G24R8:
112 case TextureFormat::G8R8:
113 case TextureFormat::G4R4:
114 if (component == 0) {
115 return descriptor.g_type;
116 }
117 if (component == 1) {
118 return descriptor.r_type;
119 }
120 break;
121 }
122 UNIMPLEMENTED_MSG("texture format not implement={}", format);
123 return ComponentType::FLOAT;
124}
125
126bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
127 constexpr u8 R = 0b0001;
128 constexpr u8 G = 0b0010;
129 constexpr u8 B = 0b0100;
130 constexpr u8 A = 0b1000;
131 constexpr std::array<u8, 16> mask = {
132 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
133 (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
134 return std::bitset<4>{mask.at(component_mask)}.test(component);
135}
136
137u32 GetComponentSize(TextureFormat format, std::size_t component) {
138 switch (format) {
139 case TextureFormat::R32_G32_B32_A32:
140 return 32;
141 case TextureFormat::R16_G16_B16_A16:
142 return 16;
143 case TextureFormat::R32_G32_B32:
144 return component <= 2 ? 32 : 0;
145 case TextureFormat::R32_G32:
146 return component <= 1 ? 32 : 0;
147 case TextureFormat::R16_G16:
148 return component <= 1 ? 16 : 0;
149 case TextureFormat::R32:
150 return component == 0 ? 32 : 0;
151 case TextureFormat::R16:
152 return component == 0 ? 16 : 0;
153 case TextureFormat::R8:
154 return component == 0 ? 8 : 0;
155 case TextureFormat::R1:
156 return component == 0 ? 1 : 0;
157 case TextureFormat::A8R8G8B8:
158 return 8;
159 case TextureFormat::A2B10G10R10:
160 return (component == 3 || component == 2 || component == 1) ? 10 : 2;
161 case TextureFormat::A4B4G4R4:
162 return 4;
163 case TextureFormat::A5B5G5R1:
164 return (component == 0 || component == 1 || component == 2) ? 5 : 1;
165 case TextureFormat::A1B5G5R5:
166 return (component == 1 || component == 2 || component == 3) ? 5 : 1;
167 case TextureFormat::R32_B24G8:
168 if (component == 0) {
169 return 32;
170 }
171 if (component == 1) {
172 return 24;
173 }
174 if (component == 2) {
175 return 8;
176 }
177 return 0;
178 case TextureFormat::B5G6R5:
179 if (component == 0 || component == 2) {
180 return 5;
181 }
182 if (component == 1) {
183 return 6;
184 }
185 return 0;
186 case TextureFormat::B6G5R5:
187 if (component == 1 || component == 2) {
188 return 5;
189 }
190 if (component == 0) {
191 return 6;
192 }
193 return 0;
194 case TextureFormat::G8R24:
195 if (component == 0) {
196 return 8;
197 }
198 if (component == 1) {
199 return 24;
200 }
201 return 0;
202 case TextureFormat::G24R8:
203 if (component == 0) {
204 return 8;
205 }
206 if (component == 1) {
207 return 24;
208 }
209 return 0;
210 case TextureFormat::G8R8:
211 return (component == 0 || component == 1) ? 8 : 0;
212 case TextureFormat::G4R4:
213 return (component == 0 || component == 1) ? 4 : 0;
214 default:
215 UNIMPLEMENTED_MSG("texture format not implement={}", format);
216 return 0;
217 }
218}
219
220std::size_t GetImageComponentMask(TextureFormat format) {
221 constexpr u8 R = 0b0001;
222 constexpr u8 G = 0b0010;
223 constexpr u8 B = 0b0100;
224 constexpr u8 A = 0b1000;
225 switch (format) {
226 case TextureFormat::R32_G32_B32_A32:
227 case TextureFormat::R16_G16_B16_A16:
228 case TextureFormat::A8R8G8B8:
229 case TextureFormat::A2B10G10R10:
230 case TextureFormat::A4B4G4R4:
231 case TextureFormat::A5B5G5R1:
232 case TextureFormat::A1B5G5R5:
233 return std::size_t{R | G | B | A};
234 case TextureFormat::R32_G32_B32:
235 case TextureFormat::R32_B24G8:
236 case TextureFormat::B5G6R5:
237 case TextureFormat::B6G5R5:
238 return std::size_t{R | G | B};
239 case TextureFormat::R32_G32:
240 case TextureFormat::R16_G16:
241 case TextureFormat::G8R24:
242 case TextureFormat::G24R8:
243 case TextureFormat::G8R8:
244 case TextureFormat::G4R4:
245 return std::size_t{R | G};
246 case TextureFormat::R32:
247 case TextureFormat::R16:
248 case TextureFormat::R8:
249 case TextureFormat::R1:
250 return std::size_t{R};
251 default:
252 UNIMPLEMENTED_MSG("texture format not implement={}", format);
253 return std::size_t{R | G | B | A};
254 }
255}
256
23std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { 257std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
24 switch (image_type) { 258 switch (image_type) {
25 case Tegra::Shader::ImageType::Texture1D: 259 case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
37} 271}
38} // Anonymous namespace 272} // Anonymous namespace
39 273
274std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
275 Node original_value) {
276 switch (component_type) {
277 case ComponentType::SNORM: {
278 // range [-1.0, 1.0]
279 auto cnv_value = Operation(OperationCode::FMul, original_value,
280 Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
281 cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
282 return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
283 }
284 case ComponentType::SINT:
285 case ComponentType::UNORM: {
286 bool is_signed = component_type == ComponentType::SINT;
287 // range [0.0, 1.0]
288 auto cnv_value = Operation(OperationCode::FMul, original_value,
289 Immediate(static_cast<float>(1 << component_size) - 1.f));
290 return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
291 is_signed};
292 }
293 case ComponentType::UINT: // range [0, (1 << component_size) - 1]
294 return {std::move(original_value), false};
295 case ComponentType::FLOAT:
296 if (component_size == 16) {
297 return {Operation(OperationCode::HCastFloat, original_value), true};
298 } else {
299 return {std::move(original_value), true};
300 }
301 default:
302 UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
303 return {std::move(original_value), true};
304 }
305}
306
40u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { 307u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
41 const Instruction instr = {program_code[pc]}; 308 const Instruction instr = {program_code[pc]};
42 const auto opcode = OpCode::Decode(instr); 309 const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
53 320
54 switch (opcode->get().GetId()) { 321 switch (opcode->get().GetId()) {
55 case OpCode::Id::SULD: { 322 case OpCode::Id::SULD: {
56 UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
57 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != 323 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
58 Tegra::Shader::OutOfBoundsStore::Ignore); 324 Tegra::Shader::OutOfBoundsStore::Ignore);
59 325
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
62 : GetBindlessImage(instr.gpr39, type)}; 328 : GetBindlessImage(instr.gpr39, type)};
63 image.MarkRead(); 329 image.MarkRead();
64 330
65 u32 indexer = 0; 331 if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
66 for (u32 element = 0; element < 4; ++element) { 332 u32 indexer = 0;
67 if (!instr.suldst.IsComponentEnabled(element)) { 333 for (u32 element = 0; element < 4; ++element) {
68 continue; 334 if (!instr.suldst.IsComponentEnabled(element)) {
335 continue;
336 }
337 MetaImage meta{image, {}, element};
338 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
339 SetTemporary(bb, indexer++, std::move(value));
340 }
341 for (u32 i = 0; i < indexer; ++i) {
342 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
343 }
344 } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
345 UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
346 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
347
348 auto descriptor = [this, instr] {
349 std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
350 if (instr.suldst.is_immediate) {
351 descriptor =
352 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
353 } else {
354 const Node image_register = GetRegister(instr.gpr39);
355 const auto [base_image, buffer, offset] = TrackCbuf(
356 image_register, global_code, static_cast<s64>(global_code.size()));
357 descriptor = registry.ObtainBindlessSampler(buffer, offset);
358 }
359 if (!descriptor) {
360 UNREACHABLE_MSG("Failed to obtain image descriptor");
361 }
362 return *descriptor;
363 }();
364
365 const auto comp_mask = GetImageComponentMask(descriptor.format);
366
367 switch (instr.suldst.GetStoreDataLayout()) {
368 case StoreType::Bits32:
369 case StoreType::Bits64: {
370 u32 indexer = 0;
371 u32 shifted_counter = 0;
372 Node value = Immediate(0);
373 for (u32 element = 0; element < 4; ++element) {
374 if (!IsComponentEnabled(comp_mask, element)) {
375 continue;
376 }
377 const auto component_type = GetComponentType(descriptor, element);
378 const auto component_size = GetComponentSize(descriptor.format, element);
379 MetaImage meta{image, {}, element};
380
381 auto [converted_value, is_signed] = GetComponentValue(
382 component_type, component_size,
383 Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
384
385 // shift element to correct position
386 const auto shifted = shifted_counter;
387 if (shifted > 0) {
388 converted_value =
389 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
390 std::move(converted_value), Immediate(shifted));
391 }
392 shifted_counter += component_size;
393
394 // add value into result
395 value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
396
397 // if we shifted enough for 1 byte -> we save it into temp
398 if (shifted_counter >= 32) {
399 SetTemporary(bb, indexer++, std::move(value));
400 // reset counter and value to prepare pack next byte
401 value = Immediate(0);
402 shifted_counter = 0;
403 }
404 }
405 for (u32 i = 0; i < indexer; ++i) {
406 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
407 }
408 break;
409 }
410 default:
411 UNREACHABLE();
412 break;
69 } 413 }
70 MetaImage meta{image, {}, element};
71 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
72 SetTemporary(bb, indexer++, std::move(value));
73 }
74 for (u32 i = 0; i < indexer; ++i) {
75 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
76 } 414 }
77 break; 415 break;
78 } 416 }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4944e9d69..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,12 +11,17 @@
11 11
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14using std::move;
14using Tegra::Shader::ConditionCode; 15using Tegra::Shader::ConditionCode;
15using Tegra::Shader::Instruction; 16using Tegra::Shader::Instruction;
17using Tegra::Shader::IpaInterpMode;
16using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::PixelImap;
17using Tegra::Shader::Register; 20using Tegra::Shader::Register;
18using Tegra::Shader::SystemVariable; 21using Tegra::Shader::SystemVariable;
19 22
23using Index = Tegra::Shader::Attribute::Index;
24
20u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { 25u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]}; 26 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr); 27 const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
66 bb.push_back(Operation(OperationCode::Discard)); 71 bb.push_back(Operation(OperationCode::Discard));
67 break; 72 break;
68 } 73 }
69 case OpCode::Id::MOV_SYS: { 74 case OpCode::Id::S2R: {
70 const Node value = [this, instr] { 75 const Node value = [this, instr] {
71 switch (instr.sys20) { 76 switch (instr.sys20) {
72 case SystemVariable::LaneId: 77 case SystemVariable::LaneId:
73 LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); 78 LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
74 return Immediate(0U); 79 return Immediate(0U);
75 case SystemVariable::InvocationId: 80 case SystemVariable::InvocationId:
76 return Operation(OperationCode::InvocationId); 81 return Operation(OperationCode::InvocationId);
77 case SystemVariable::Ydirection: 82 case SystemVariable::Ydirection:
78 return Operation(OperationCode::YNegate); 83 return Operation(OperationCode::YNegate);
79 case SystemVariable::InvocationInfo: 84 case SystemVariable::InvocationInfo:
80 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); 85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorXY:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
89 return Immediate(0U);
90 case SystemVariable::WscaleFactorZ:
91 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
81 return Immediate(0U); 92 return Immediate(0U);
82 case SystemVariable::Tid: { 93 case SystemVariable::Tid: {
83 Node value = Immediate(0); 94 Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
213 } 224 }
214 case OpCode::Id::IPA: { 225 case OpCode::Id::IPA: {
215 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; 226 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
216
217 const auto attribute = instr.attribute.fmt28; 227 const auto attribute = instr.attribute.fmt28;
218 const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), 228 const Index index = attribute.index;
219 instr.ipa.sample_mode.Value()};
220 229
221 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) 230 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
222 : GetInputAttribute(attribute.index, attribute.element); 231 : GetInputAttribute(index, attribute.element);
223 const Tegra::Shader::Attribute::Index index = attribute.index.Value(); 232
224 const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && 233 // Code taken from Ryujinx.
225 index <= Tegra::Shader::Attribute::Index::Attribute_31; 234 if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
226 if (is_generic || is_physical) { 235 const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
227 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. 236 if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
228 // In theory by setting them as perspective, OpenGL does the perspective correction. 237 Node position_w = GetInputAttribute(Index::Position, 3);
229 // A way must figured to reverse the last step of it. 238 value = Operation(OperationCode::FMul, move(value), move(position_w));
230 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
231 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
232 } 239 }
233 } 240 }
234 value = GetSaturatedFloat(value, instr.ipa.saturate);
235 241
236 SetRegister(bb, instr.gpr0, value); 242 if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
243 value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
244 }
245
246 value = GetSaturatedFloat(move(value), instr.ipa.saturate);
247
248 SetRegister(bb, instr.gpr0, move(value));
237 break; 249 break;
238 } 250 }
239 case OpCode::Id::OUT_R: { 251 case OpCode::Id::OUT_R: {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index baf7188d2..8852c8a1b 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
359 switch (cc) { 359 switch (cc) {
360 case Tegra::Shader::ConditionCode::NEU: 360 case Tegra::Shader::ConditionCode::NEU:
361 return GetInternalFlag(InternalFlag::Zero, true); 361 return GetInternalFlag(InternalFlag::Zero, true);
362 case Tegra::Shader::ConditionCode::FCSM_TR:
363 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
364 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
362 default: 365 default:
363 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); 366 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
364 return MakeNode<PredicateNode>(Pred::NeverExecute, false); 367 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 80fc9b82c..ca6c976c9 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -312,6 +312,10 @@ private:
312 /// Conditionally saturates a half float pair 312 /// Conditionally saturates a half float pair
313 Node GetSaturatedHalfFloat(Node value, bool saturate = true); 313 Node GetSaturatedHalfFloat(Node value, bool saturate = true);
314 314
315 /// Get image component value by type and size
316 std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
317 u32 component_size, Node original_value);
318
315 /// Returns a predicate comparing two floats 319 /// Returns a predicate comparing two floats
316 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); 320 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
317 /// Returns a predicate comparing two integers 321 /// Returns a predicate comparing two integers
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index ae8817465..e0acd44d3 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
504 return GetFormatBpp(pixel_format) / CHAR_BIT; 504 return GetFormatBpp(pixel_format) / CHAR_BIT;
505} 505}
506 506
507enum class SurfaceCompression {
508 None, // Not compressed
509 Compressed, // Texture is compressed
510 Converted, // Texture is converted before upload or after download
511 Rearranged, // Texture is swizzled before upload or after download
512};
513
514constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
515 SurfaceCompression::None, // ABGR8U
516 SurfaceCompression::None, // ABGR8S
517 SurfaceCompression::None, // ABGR8UI
518 SurfaceCompression::None, // B5G6R5U
519 SurfaceCompression::None, // A2B10G10R10U
520 SurfaceCompression::None, // A1B5G5R5U
521 SurfaceCompression::None, // R8U
522 SurfaceCompression::None, // R8UI
523 SurfaceCompression::None, // RGBA16F
524 SurfaceCompression::None, // RGBA16U
525 SurfaceCompression::None, // RGBA16S
526 SurfaceCompression::None, // RGBA16UI
527 SurfaceCompression::None, // R11FG11FB10F
528 SurfaceCompression::None, // RGBA32UI
529 SurfaceCompression::Compressed, // DXT1
530 SurfaceCompression::Compressed, // DXT23
531 SurfaceCompression::Compressed, // DXT45
532 SurfaceCompression::Compressed, // DXN1
533 SurfaceCompression::Compressed, // DXN2UNORM
534 SurfaceCompression::Compressed, // DXN2SNORM
535 SurfaceCompression::Compressed, // BC7U
536 SurfaceCompression::Compressed, // BC6H_UF16
537 SurfaceCompression::Compressed, // BC6H_SF16
538 SurfaceCompression::Converted, // ASTC_2D_4X4
539 SurfaceCompression::None, // BGRA8
540 SurfaceCompression::None, // RGBA32F
541 SurfaceCompression::None, // RG32F
542 SurfaceCompression::None, // R32F
543 SurfaceCompression::None, // R16F
544 SurfaceCompression::None, // R16U
545 SurfaceCompression::None, // R16S
546 SurfaceCompression::None, // R16UI
547 SurfaceCompression::None, // R16I
548 SurfaceCompression::None, // RG16
549 SurfaceCompression::None, // RG16F
550 SurfaceCompression::None, // RG16UI
551 SurfaceCompression::None, // RG16I
552 SurfaceCompression::None, // RG16S
553 SurfaceCompression::None, // RGB32F
554 SurfaceCompression::None, // RGBA8_SRGB
555 SurfaceCompression::None, // RG8U
556 SurfaceCompression::None, // RG8S
557 SurfaceCompression::None, // RG32UI
558 SurfaceCompression::None, // RGBX16F
559 SurfaceCompression::None, // R32UI
560 SurfaceCompression::None, // R32I
561 SurfaceCompression::Converted, // ASTC_2D_8X8
562 SurfaceCompression::Converted, // ASTC_2D_8X5
563 SurfaceCompression::Converted, // ASTC_2D_5X4
564 SurfaceCompression::None, // BGRA8_SRGB
565 SurfaceCompression::Compressed, // DXT1_SRGB
566 SurfaceCompression::Compressed, // DXT23_SRGB
567 SurfaceCompression::Compressed, // DXT45_SRGB
568 SurfaceCompression::Compressed, // BC7U_SRGB
569 SurfaceCompression::None, // R4G4B4A4U
570 SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
571 SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
572 SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
573 SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB
574 SurfaceCompression::Converted, // ASTC_2D_5X5
575 SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
576 SurfaceCompression::Converted, // ASTC_2D_10X8
577 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
578 SurfaceCompression::Converted, // ASTC_2D_6X6
579 SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
580 SurfaceCompression::Converted, // ASTC_2D_10X10
581 SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
582 SurfaceCompression::Converted, // ASTC_2D_12X12
583 SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
584 SurfaceCompression::Converted, // ASTC_2D_8X6
585 SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
586 SurfaceCompression::Converted, // ASTC_2D_6X5
587 SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
588 SurfaceCompression::None, // E5B9G9R9F
589 SurfaceCompression::None, // Z32F
590 SurfaceCompression::None, // Z16
591 SurfaceCompression::None, // Z24S8
592 SurfaceCompression::Rearranged, // S8Z24
593 SurfaceCompression::None, // Z32FS8
594}};
595
596constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
597 if (format == PixelFormat::Invalid) {
598 return SurfaceCompression::None;
599 }
600 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
601 return compression_type_table[static_cast<std::size_t>(format)];
602}
603
604SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); 507SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
605 508
606bool SurfaceTargetIsLayered(SurfaceTarget target); 509bool SurfaceTargetIsLayered(SurfaceTarget target);
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 002df414f..7af0e792c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192,
18 18
19using Tegra::Texture::ConvertFromGuestToHost; 19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode; 20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::SurfaceCompression; 21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
22 23
23StagingCache::StagingCache() = default; 24StagingCache::StagingCache() = default;
24 25
25StagingCache::~StagingCache() = default; 26StagingCache::~StagingCache() = default;
26 27
27SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) 28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
28 : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, 29 bool is_astc_supported)
29 mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { 30 : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
30 std::size_t offset = 0; 35 std::size_t offset = 0;
31 for (u32 level = 0; level < params.num_levels; ++level) { 36 for (u32 level = 0; level < params.num_levels; ++level) {
32 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; 37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
@@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf
164 169
165 std::size_t guest_offset{mipmap_offsets[level]}; 170 std::size_t guest_offset{mipmap_offsets[level]};
166 if (params.is_layered) { 171 if (params.is_layered) {
167 std::size_t host_offset{0}; 172 std::size_t host_offset = 0;
168 const std::size_t guest_stride = layer_size; 173 const std::size_t guest_stride = layer_size;
169 const std::size_t host_stride = params.GetHostLayerSize(level); 174 const std::size_t host_stride = params.GetHostLayerSize(level);
170 for (u32 layer = 0; layer < params.depth; ++layer) { 175 for (u32 layer = 0; layer < params.depth; ++layer) {
@@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
185 MICROPROFILE_SCOPE(GPU_Load_Texture); 190 MICROPROFILE_SCOPE(GPU_Load_Texture);
186 auto& staging_buffer = staging_cache.GetBuffer(0); 191 auto& staging_buffer = staging_cache.GetBuffer(0);
187 u8* host_ptr; 192 u8* host_ptr;
188 is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); 193 // Use an extra temporal buffer
189 194 auto& tmp_buffer = staging_cache.GetBuffer(1);
190 // Handle continuouty 195 tmp_buffer.resize(guest_memory_size);
191 if (is_continuous) { 196 host_ptr = tmp_buffer.data();
192 // Use physical memory directly 197 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
193 host_ptr = memory_manager.GetPointer(gpu_addr);
194 if (!host_ptr) {
195 return;
196 }
197 } else {
198 // Use an extra temporal buffer
199 auto& tmp_buffer = staging_cache.GetBuffer(1);
200 tmp_buffer.resize(guest_memory_size);
201 host_ptr = tmp_buffer.data();
202 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
203 }
204 198
205 if (params.is_tiled) { 199 if (params.is_tiled) {
206 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", 200 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
207 params.block_width, static_cast<u32>(params.target)); 201 params.block_width, static_cast<u32>(params.target));
208 for (u32 level = 0; level < params.num_levels; ++level) { 202 for (u32 level = 0; level < params.num_levels; ++level) {
209 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; 203 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
210 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, 204 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
211 staging_buffer.data() + host_offset, level); 205 staging_buffer.data() + host_offset, level);
212 } 206 }
@@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
219 const u32 height{(params.height + block_height - 1) / block_height}; 213 const u32 height{(params.height + block_height - 1) / block_height};
220 const u32 copy_size{width * bpp}; 214 const u32 copy_size{width * bpp};
221 if (params.pitch == copy_size) { 215 if (params.pitch == copy_size) {
222 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); 216 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
223 } else { 217 } else {
224 const u8* start{host_ptr}; 218 const u8* start{host_ptr};
225 u8* write_to{staging_buffer.data()}; 219 u8* write_to{staging_buffer.data()};
@@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
231 } 225 }
232 } 226 }
233 227
234 auto compression_type = params.GetCompressionType(); 228 if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
235 if (compression_type == SurfaceCompression::None ||
236 compression_type == SurfaceCompression::Compressed)
237 return; 229 return;
230 }
238 231
239 for (u32 level_up = params.num_levels; level_up > 0; --level_up) { 232 for (u32 level = params.num_levels; level--;) {
240 const u32 level = level_up - 1; 233 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
241 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; 234 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
242 const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged 235 u8* const in_buffer = staging_buffer.data() + in_host_offset;
243 ? in_host_offset 236 u8* const out_buffer = staging_buffer.data() + out_host_offset;
244 : params.GetConvertedMipmapOffset(level);
245 u8* in_buffer = staging_buffer.data() + in_host_offset;
246 u8* out_buffer = staging_buffer.data() + out_host_offset;
247 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, 237 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
248 params.GetMipWidth(level), params.GetMipHeight(level), 238 params.GetMipWidth(level), params.GetMipHeight(level),
249 params.GetMipDepth(level), true, true); 239 params.GetMipDepth(level), true, true);
@@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
256 auto& staging_buffer = staging_cache.GetBuffer(0); 246 auto& staging_buffer = staging_cache.GetBuffer(0);
257 u8* host_ptr; 247 u8* host_ptr;
258 248
259 // Handle continuouty 249 // Use an extra temporal buffer
260 if (is_continuous) { 250 auto& tmp_buffer = staging_cache.GetBuffer(1);
261 // Use physical memory directly 251 tmp_buffer.resize(guest_memory_size);
262 host_ptr = memory_manager.GetPointer(gpu_addr); 252 host_ptr = tmp_buffer.data();
263 if (!host_ptr) {
264 return;
265 }
266 } else {
267 // Use an extra temporal buffer
268 auto& tmp_buffer = staging_cache.GetBuffer(1);
269 tmp_buffer.resize(guest_memory_size);
270 host_ptr = tmp_buffer.data();
271 }
272 253
273 if (params.is_tiled) { 254 if (params.is_tiled) {
274 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); 255 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
275 for (u32 level = 0; level < params.num_levels; ++level) { 256 for (u32 level = 0; level < params.num_levels; ++level) {
276 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; 257 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
277 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, 258 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
278 staging_buffer.data() + host_offset, level); 259 staging_buffer.data() + host_offset, level);
279 } 260 }
@@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
299 } 280 }
300 } 281 }
301 } 282 }
302 if (!is_continuous) { 283 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
303 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
304 }
305} 284}
306 285
307} // namespace VideoCommon 286} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 5f79bb0aa..a39a8661b 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -68,8 +68,8 @@ public:
68 return gpu_addr; 68 return gpu_addr;
69 } 69 }
70 70
71 bool Overlaps(const CacheAddr start, const CacheAddr end) const { 71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cache_addr < end) && (cache_addr_end > start); 72 return (cpu_addr < end) && (cpu_addr_end > start);
73 } 73 }
74 74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { 75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@@ -86,21 +86,13 @@ public:
86 return cpu_addr; 86 return cpu_addr;
87 } 87 }
88 88
89 void SetCpuAddr(const VAddr new_addr) { 89 VAddr GetCpuAddrEnd() const {
90 cpu_addr = new_addr; 90 return cpu_addr_end;
91 }
92
93 CacheAddr GetCacheAddr() const {
94 return cache_addr;
95 } 91 }
96 92
97 CacheAddr GetCacheAddrEnd() const { 93 void SetCpuAddr(const VAddr new_addr) {
98 return cache_addr_end; 94 cpu_addr = new_addr;
99 } 95 cpu_addr_end = new_addr + guest_memory_size;
100
101 void SetCacheAddr(const CacheAddr new_addr) {
102 cache_addr = new_addr;
103 cache_addr_end = new_addr + guest_memory_size;
104 } 96 }
105 97
106 const SurfaceParams& GetSurfaceParams() const { 98 const SurfaceParams& GetSurfaceParams() const {
@@ -119,18 +111,14 @@ public:
119 return mipmap_sizes[level]; 111 return mipmap_sizes[level];
120 } 112 }
121 113
122 void MarkAsContinuous(const bool is_continuous) {
123 this->is_continuous = is_continuous;
124 }
125
126 bool IsContinuous() const {
127 return is_continuous;
128 }
129
130 bool IsLinear() const { 114 bool IsLinear() const {
131 return !params.is_tiled; 115 return !params.is_tiled;
132 } 116 }
133 117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
134 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { 122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
135 return params.pixel_format == pixel_format; 123 return params.pixel_format == pixel_format;
136 } 124 }
@@ -160,7 +148,8 @@ public:
160 } 148 }
161 149
162protected: 150protected:
163 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); 151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
152 bool is_astc_supported);
164 ~SurfaceBaseImpl() = default; 153 ~SurfaceBaseImpl() = default;
165 154
166 virtual void DecorateSurfaceName() = 0; 155 virtual void DecorateSurfaceName() = 0;
@@ -168,12 +157,11 @@ protected:
168 const SurfaceParams params; 157 const SurfaceParams params;
169 std::size_t layer_size; 158 std::size_t layer_size;
170 std::size_t guest_memory_size; 159 std::size_t guest_memory_size;
171 const std::size_t host_memory_size; 160 std::size_t host_memory_size;
172 GPUVAddr gpu_addr{}; 161 GPUVAddr gpu_addr{};
173 CacheAddr cache_addr{};
174 CacheAddr cache_addr_end{};
175 VAddr cpu_addr{}; 162 VAddr cpu_addr{};
176 bool is_continuous{}; 163 VAddr cpu_addr_end{};
164 bool is_converted{};
177 165
178 std::vector<std::size_t> mipmap_sizes; 166 std::vector<std::size_t> mipmap_sizes;
179 std::vector<std::size_t> mipmap_offsets; 167 std::vector<std::size_t> mipmap_offsets;
@@ -288,8 +276,9 @@ public:
288 } 276 }
289 277
290protected: 278protected:
291 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) 279 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
292 : SurfaceBaseImpl(gpu_addr, params) {} 280 bool is_astc_supported)
281 : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
293 282
294 ~SurfaceBase() = default; 283 ~SurfaceBase() = default;
295 284
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9931c5ef7..6f3ef45be 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
113 params.height = tic.Height(); 113 params.height = tic.Height();
114 params.depth = tic.Depth(); 114 params.depth = tic.Depth();
115 params.pitch = params.is_tiled ? 0 : tic.Pitch(); 115 params.pitch = params.is_tiled ? 0 : tic.Pitch();
116 if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { 116 if (params.target == SurfaceTarget::TextureCubemap ||
117 params.depth = 1; 117 params.target == SurfaceTarget::TextureCubeArray) {
118 } else if (params.target == SurfaceTarget::TextureCubemap ||
119 params.target == SurfaceTarget::TextureCubeArray) {
120 params.depth *= 6; 118 params.depth *= 6;
121 } 119 }
122 params.num_levels = tic.max_mip_level + 1; 120 params.num_levels = tic.max_mip_level + 1;
@@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
309 return offset; 307 return offset;
310} 308}
311 309
312std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { 310std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
313 std::size_t offset = 0;
314 for (u32 i = 0; i < level; i++) {
315 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
316 }
317 return offset;
318}
319
320std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
321 std::size_t offset = 0; 311 std::size_t offset = 0;
322 for (u32 i = 0; i < level; i++) { 312 if (is_converted) {
323 offset += GetConvertedMipmapSize(i); 313 for (u32 i = 0; i < level; ++i) {
314 offset += GetConvertedMipmapSize(i) * GetNumLayers();
315 }
316 } else {
317 for (u32 i = 0; i < level; ++i) {
318 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
319 }
324 } 320 }
325 return offset; 321 return offset;
326} 322}
327 323
328std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { 324std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
329 constexpr std::size_t rgba8_bpp = 4ULL; 325 constexpr std::size_t rgba8_bpp = 4ULL;
330 const std::size_t width_t = GetMipWidth(level); 326 const std::size_t mip_width = GetMipWidth(level);
331 const std::size_t height_t = GetMipHeight(level); 327 const std::size_t mip_height = GetMipHeight(level);
332 const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); 328 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
333 return width_t * height_t * depth_t * rgba8_bpp; 329 return mip_width * mip_height * mip_depth * rgba8_bpp;
334} 330}
335 331
336std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { 332std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 995cc3818..24957df8d 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -20,8 +20,6 @@ namespace VideoCommon {
20 20
21class FormatLookupTable; 21class FormatLookupTable;
22 22
23using VideoCore::Surface::SurfaceCompression;
24
25class SurfaceParams { 23class SurfaceParams {
26public: 24public:
27 /// Creates SurfaceCachedParams from a texture configuration. 25 /// Creates SurfaceCachedParams from a texture configuration.
@@ -67,16 +65,14 @@ public:
67 return GetInnerMemorySize(false, false, false); 65 return GetInnerMemorySize(false, false, false);
68 } 66 }
69 67
70 std::size_t GetHostSizeInBytes() const { 68 std::size_t GetHostSizeInBytes(bool is_converted) const {
71 std::size_t host_size_in_bytes; 69 if (!is_converted) {
72 if (GetCompressionType() == SurfaceCompression::Converted) { 70 return GetInnerMemorySize(true, false, false);
73 // ASTC is uncompressed in software, in emulated as RGBA8 71 }
74 host_size_in_bytes = 0; 72 // ASTC is uncompressed in software, in emulated as RGBA8
75 for (u32 level = 0; level < num_levels; ++level) { 73 std::size_t host_size_in_bytes = 0;
76 host_size_in_bytes += GetConvertedMipmapSize(level); 74 for (u32 level = 0; level < num_levels; ++level) {
77 } 75 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
78 } else {
79 host_size_in_bytes = GetInnerMemorySize(true, false, false);
80 } 76 }
81 return host_size_in_bytes; 77 return host_size_in_bytes;
82 } 78 }
@@ -107,9 +103,8 @@ public:
107 u32 GetMipBlockDepth(u32 level) const; 103 u32 GetMipBlockDepth(u32 level) const;
108 104
109 /// Returns the best possible row/pitch alignment for the surface. 105 /// Returns the best possible row/pitch alignment for the surface.
110 u32 GetRowAlignment(u32 level) const { 106 u32 GetRowAlignment(u32 level, bool is_converted) const {
111 const u32 bpp = 107 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
112 GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
113 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); 108 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
114 } 109 }
115 110
@@ -117,11 +112,7 @@ public:
117 std::size_t GetGuestMipmapLevelOffset(u32 level) const; 112 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
118 113
119 /// Returns the offset in bytes in host memory (linear) of a given mipmap level. 114 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
120 std::size_t GetHostMipmapLevelOffset(u32 level) const; 115 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
121
122 /// Returns the offset in bytes in host memory (linear) of a given mipmap level
123 /// for a texture that is converted in host gpu.
124 std::size_t GetConvertedMipmapOffset(u32 level) const;
125 116
126 /// Returns the size in bytes in guest memory of a given mipmap level. 117 /// Returns the size in bytes in guest memory of a given mipmap level.
127 std::size_t GetGuestMipmapSize(u32 level) const { 118 std::size_t GetGuestMipmapSize(u32 level) const {
@@ -196,11 +187,6 @@ public:
196 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; 187 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
197 } 188 }
198 189
199 /// Returns how the compression should be handled for this texture.
200 SurfaceCompression GetCompressionType() const {
201 return VideoCore::Surface::GetFormatCompressionType(pixel_format);
202 }
203
204 /// Returns is the surface is a TextureBuffer type of surface. 190 /// Returns is the surface is a TextureBuffer type of surface.
205 bool IsBuffer() const { 191 bool IsBuffer() const {
206 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; 192 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6cdbe63d0..88fe3e25f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
52 52
53template <typename TSurface, typename TView> 53template <typename TSurface, typename TView>
54class TextureCache { 54class TextureCache {
55 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
56 using IntervalType = typename IntervalMap::interval_type;
57 55
58public: 56public:
59 void InvalidateRegion(CacheAddr addr, std::size_t size) { 57 void InvalidateRegion(VAddr addr, std::size_t size) {
60 std::lock_guard lock{mutex}; 58 std::lock_guard lock{mutex};
61 59
62 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 60 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -76,7 +74,7 @@ public:
76 guard_samplers = new_guard; 74 guard_samplers = new_guard;
77 } 75 }
78 76
79 void FlushRegion(CacheAddr addr, std::size_t size) { 77 void FlushRegion(VAddr addr, std::size_t size) {
80 std::lock_guard lock{mutex}; 78 std::lock_guard lock{mutex};
81 79
82 auto surfaces = GetSurfacesInRegion(addr, size); 80 auto surfaces = GetSurfacesInRegion(addr, size);
@@ -99,9 +97,9 @@ public:
99 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 97 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
100 } 98 }
101 99
102 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 100 const std::optional<VAddr> cpu_addr =
103 const auto cache_addr{ToCacheAddr(host_ptr)}; 101 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
104 if (!cache_addr) { 102 if (!cpu_addr) {
105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 103 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
106 } 104 }
107 105
@@ -110,7 +108,7 @@ public:
110 } 108 }
111 109
112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 110 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 111 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
114 if (guard_samplers) { 112 if (guard_samplers) {
115 sampled_textures.push_back(surface); 113 sampled_textures.push_back(surface);
116 } 114 }
@@ -124,13 +122,13 @@ public:
124 if (!gpu_addr) { 122 if (!gpu_addr) {
125 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 123 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
126 } 124 }
127 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 125 const std::optional<VAddr> cpu_addr =
128 const auto cache_addr{ToCacheAddr(host_ptr)}; 126 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
129 if (!cache_addr) { 127 if (!cpu_addr) {
130 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 128 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
131 } 129 }
132 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; 130 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
133 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 131 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false);
134 if (guard_samplers) { 132 if (guard_samplers) {
135 sampled_textures.push_back(surface); 133 sampled_textures.push_back(surface);
136 } 134 }
@@ -159,14 +157,14 @@ public:
159 SetEmptyDepthBuffer(); 157 SetEmptyDepthBuffer();
160 return {}; 158 return {};
161 } 159 }
162 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 160 const std::optional<VAddr> cpu_addr =
163 const auto cache_addr{ToCacheAddr(host_ptr)}; 161 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
164 if (!cache_addr) { 162 if (!cpu_addr) {
165 SetEmptyDepthBuffer(); 163 SetEmptyDepthBuffer();
166 return {}; 164 return {};
167 } 165 }
168 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; 166 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
169 auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); 167 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
170 if (depth_buffer.target) 168 if (depth_buffer.target)
171 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 169 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
172 depth_buffer.target = surface_view.first; 170 depth_buffer.target = surface_view.first;
@@ -199,15 +197,15 @@ public:
199 return {}; 197 return {};
200 } 198 }
201 199
202 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 200 const std::optional<VAddr> cpu_addr =
203 const auto cache_addr{ToCacheAddr(host_ptr)}; 201 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
204 if (!cache_addr) { 202 if (!cpu_addr) {
205 SetEmptyColorBuffer(index); 203 SetEmptyColorBuffer(index);
206 return {}; 204 return {};
207 } 205 }
208 206
209 auto surface_view = 207 auto surface_view =
210 GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), 208 GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
211 preserve_contents, true); 209 preserve_contents, true);
212 if (render_targets[index].target) 210 if (render_targets[index].target)
213 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 211 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
@@ -257,27 +255,26 @@ public:
257 const GPUVAddr src_gpu_addr = src_config.Address(); 255 const GPUVAddr src_gpu_addr = src_config.Address();
258 const GPUVAddr dst_gpu_addr = dst_config.Address(); 256 const GPUVAddr dst_gpu_addr = dst_config.Address();
259 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); 257 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
260 const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; 258 const std::optional<VAddr> dst_cpu_addr =
261 const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; 259 system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
262 const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; 260 const std::optional<VAddr> src_cpu_addr =
263 const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; 261 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
264 std::pair<TSurface, TView> dst_surface = 262 std::pair<TSurface, TView> dst_surface =
265 GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); 263 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
266 std::pair<TSurface, TView> src_surface = 264 std::pair<TSurface, TView> src_surface =
267 GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); 265 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
268 ImageBlit(src_surface.second, dst_surface.second, copy_config); 266 ImageBlit(src_surface.second, dst_surface.second, copy_config);
269 dst_surface.first->MarkAsModified(true, Tick()); 267 dst_surface.first->MarkAsModified(true, Tick());
270 } 268 }
271 269
272 TSurface TryFindFramebufferSurface(const u8* host_ptr) { 270 TSurface TryFindFramebufferSurface(VAddr addr) {
273 const CacheAddr cache_addr = ToCacheAddr(host_ptr); 271 if (!addr) {
274 if (!cache_addr) {
275 return nullptr; 272 return nullptr;
276 } 273 }
277 const CacheAddr page = cache_addr >> registry_page_bits; 274 const VAddr page = addr >> registry_page_bits;
278 std::vector<TSurface>& list = registry[page]; 275 std::vector<TSurface>& list = registry[page];
279 for (auto& surface : list) { 276 for (auto& surface : list) {
280 if (surface->GetCacheAddr() == cache_addr) { 277 if (surface->GetCpuAddr() == addr) {
281 return surface; 278 return surface;
282 } 279 }
283 } 280 }
@@ -289,8 +286,9 @@ public:
289 } 286 }
290 287
291protected: 288protected:
292 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) 289 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
293 : system{system}, rasterizer{rasterizer} { 290 bool is_astc_supported)
291 : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
294 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 292 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
295 SetEmptyColorBuffer(i); 293 SetEmptyColorBuffer(i);
296 } 294 }
@@ -337,18 +335,14 @@ protected:
337 335
338 void Register(TSurface surface) { 336 void Register(TSurface surface) {
339 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 337 const GPUVAddr gpu_addr = surface->GetGpuAddr();
340 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
341 const std::size_t size = surface->GetSizeInBytes(); 338 const std::size_t size = surface->GetSizeInBytes();
342 const std::optional<VAddr> cpu_addr = 339 const std::optional<VAddr> cpu_addr =
343 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); 340 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
344 if (!cache_ptr || !cpu_addr) { 341 if (!cpu_addr) {
345 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 342 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
346 gpu_addr); 343 gpu_addr);
347 return; 344 return;
348 } 345 }
349 const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
350 surface->MarkAsContinuous(continuous);
351 surface->SetCacheAddr(cache_ptr);
352 surface->SetCpuAddr(*cpu_addr); 346 surface->SetCpuAddr(*cpu_addr);
353 RegisterInnerCache(surface); 347 RegisterInnerCache(surface);
354 surface->MarkAsRegistered(true); 348 surface->MarkAsRegistered(true);
@@ -381,6 +375,7 @@ protected:
381 } 375 }
382 376
383 Core::System& system; 377 Core::System& system;
378 const bool is_astc_supported;
384 379
385private: 380private:
386 enum class RecycleStrategy : u32 { 381 enum class RecycleStrategy : u32 {
@@ -632,7 +627,7 @@ private:
632 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 627 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
633 const SurfaceParams& params, 628 const SurfaceParams& params,
634 const GPUVAddr gpu_addr, 629 const GPUVAddr gpu_addr,
635 const CacheAddr cache_addr, 630 const VAddr cpu_addr,
636 bool preserve_contents) { 631 bool preserve_contents) {
637 if (params.target == SurfaceTarget::Texture3D) { 632 if (params.target == SurfaceTarget::Texture3D) {
638 bool failed = false; 633 bool failed = false;
@@ -657,7 +652,7 @@ private:
657 failed = true; 652 failed = true;
658 break; 653 break;
659 } 654 }
660 const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); 655 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
661 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); 656 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
662 modified |= surface->IsModified(); 657 modified |= surface->IsModified();
663 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 658 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@@ -677,7 +672,7 @@ private:
677 } else { 672 } else {
678 for (const auto& surface : overlaps) { 673 for (const auto& surface : overlaps) {
679 if (!surface->MatchTarget(params.target)) { 674 if (!surface->MatchTarget(params.target)) {
680 if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { 675 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
681 if (Settings::values.use_accurate_gpu_emulation) { 676 if (Settings::values.use_accurate_gpu_emulation) {
682 return std::nullopt; 677 return std::nullopt;
683 } 678 }
@@ -686,7 +681,7 @@ private:
686 } 681 }
687 return std::nullopt; 682 return std::nullopt;
688 } 683 }
689 if (surface->GetCacheAddr() != cache_addr) { 684 if (surface->GetCpuAddr() != cpu_addr) {
690 continue; 685 continue;
691 } 686 }
692 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { 687 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
@@ -720,13 +715,13 @@ private:
720 * left blank. 715 * left blank.
721 * @param is_render Whether or not the surface is a render target. 716 * @param is_render Whether or not the surface is a render target.
722 **/ 717 **/
723 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, 718 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
724 const SurfaceParams& params, bool preserve_contents, 719 const SurfaceParams& params, bool preserve_contents,
725 bool is_render) { 720 bool is_render) {
726 // Step 1 721 // Step 1
727 // Check Level 1 Cache for a fast structural match. If candidate surface 722 // Check Level 1 Cache for a fast structural match. If candidate surface
728 // matches at certain level we are pretty much done. 723 // matches at certain level we are pretty much done.
729 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { 724 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
730 TSurface& current_surface = iter->second; 725 TSurface& current_surface = iter->second;
731 const auto topological_result = current_surface->MatchesTopology(params); 726 const auto topological_result = current_surface->MatchesTopology(params);
732 if (topological_result != MatchTopologyResult::FullMatch) { 727 if (topological_result != MatchTopologyResult::FullMatch) {
@@ -753,7 +748,7 @@ private:
753 // Step 2 748 // Step 2
754 // Obtain all possible overlaps in the memory region 749 // Obtain all possible overlaps in the memory region
755 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 750 const std::size_t candidate_size = params.GetGuestSizeInBytes();
756 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; 751 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
757 752
758 // If none are found, we are done. we just load the surface and create it. 753 // If none are found, we are done. we just load the surface and create it.
759 if (overlaps.empty()) { 754 if (overlaps.empty()) {
@@ -775,7 +770,7 @@ private:
775 // Check if it's a 3D texture 770 // Check if it's a 3D texture
776 if (params.block_depth > 0) { 771 if (params.block_depth > 0) {
777 auto surface = 772 auto surface =
778 Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); 773 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
779 if (surface) { 774 if (surface) {
780 return *surface; 775 return *surface;
781 } 776 }
@@ -850,16 +845,16 @@ private:
850 * @param params The parameters on the candidate surface. 845 * @param params The parameters on the candidate surface.
851 **/ 846 **/
852 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 847 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
853 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 848 const std::optional<VAddr> cpu_addr =
854 const auto cache_addr{ToCacheAddr(host_ptr)}; 849 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
855 850
856 if (!cache_addr) { 851 if (!cpu_addr) {
857 Deduction result{}; 852 Deduction result{};
858 result.type = DeductionType::DeductionFailed; 853 result.type = DeductionType::DeductionFailed;
859 return result; 854 return result;
860 } 855 }
861 856
862 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { 857 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
863 TSurface& current_surface = iter->second; 858 TSurface& current_surface = iter->second;
864 const auto topological_result = current_surface->MatchesTopology(params); 859 const auto topological_result = current_surface->MatchesTopology(params);
865 if (topological_result != MatchTopologyResult::FullMatch) { 860 if (topological_result != MatchTopologyResult::FullMatch) {
@@ -878,7 +873,7 @@ private:
878 } 873 }
879 874
880 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 875 const std::size_t candidate_size = params.GetGuestSizeInBytes();
881 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; 876 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
882 877
883 if (overlaps.empty()) { 878 if (overlaps.empty()) {
884 Deduction result{}; 879 Deduction result{};
@@ -1022,10 +1017,10 @@ private:
1022 } 1017 }
1023 1018
1024 void RegisterInnerCache(TSurface& surface) { 1019 void RegisterInnerCache(TSurface& surface) {
1025 const CacheAddr cache_addr = surface->GetCacheAddr(); 1020 const VAddr cpu_addr = surface->GetCpuAddr();
1026 CacheAddr start = cache_addr >> registry_page_bits; 1021 VAddr start = cpu_addr >> registry_page_bits;
1027 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; 1022 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1028 l1_cache[cache_addr] = surface; 1023 l1_cache[cpu_addr] = surface;
1029 while (start <= end) { 1024 while (start <= end) {
1030 registry[start].push_back(surface); 1025 registry[start].push_back(surface);
1031 start++; 1026 start++;
@@ -1033,10 +1028,10 @@ private:
1033 } 1028 }
1034 1029
1035 void UnregisterInnerCache(TSurface& surface) { 1030 void UnregisterInnerCache(TSurface& surface) {
1036 const CacheAddr cache_addr = surface->GetCacheAddr(); 1031 const VAddr cpu_addr = surface->GetCpuAddr();
1037 CacheAddr start = cache_addr >> registry_page_bits; 1032 VAddr start = cpu_addr >> registry_page_bits;
1038 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; 1033 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1039 l1_cache.erase(cache_addr); 1034 l1_cache.erase(cpu_addr);
1040 while (start <= end) { 1035 while (start <= end) {
1041 auto& reg{registry[start]}; 1036 auto& reg{registry[start]};
1042 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1037 reg.erase(std::find(reg.begin(), reg.end(), surface));
@@ -1044,18 +1039,18 @@ private:
1044 } 1039 }
1045 } 1040 }
1046 1041
1047 std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { 1042 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1048 if (size == 0) { 1043 if (size == 0) {
1049 return {}; 1044 return {};
1050 } 1045 }
1051 const CacheAddr cache_addr_end = cache_addr + size; 1046 const VAddr cpu_addr_end = cpu_addr + size;
1052 CacheAddr start = cache_addr >> registry_page_bits; 1047 VAddr start = cpu_addr >> registry_page_bits;
1053 const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; 1048 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1054 std::vector<TSurface> surfaces; 1049 std::vector<TSurface> surfaces;
1055 while (start <= end) { 1050 while (start <= end) {
1056 std::vector<TSurface>& list = registry[start]; 1051 std::vector<TSurface>& list = registry[start];
1057 for (auto& surface : list) { 1052 for (auto& surface : list) {
1058 if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { 1053 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
1059 surface->MarkAsPicked(true); 1054 surface->MarkAsPicked(true);
1060 surfaces.push_back(surface); 1055 surfaces.push_back(surface);
1061 } 1056 }
@@ -1144,14 +1139,14 @@ private:
1144 // large in size. 1139 // large in size.
1145 static constexpr u64 registry_page_bits{20}; 1140 static constexpr u64 registry_page_bits{20};
1146 static constexpr u64 registry_page_size{1 << registry_page_bits}; 1141 static constexpr u64 registry_page_size{1 << registry_page_bits};
1147 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 1142 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1148 1143
1149 static constexpr u32 DEPTH_RT = 8; 1144 static constexpr u32 DEPTH_RT = 8;
1150 static constexpr u32 NO_RT = 0xFFFFFFFF; 1145 static constexpr u32 NO_RT = 0xFFFFFFFF;
1151 1146
1152 // The L1 Cache is used for fast texture lookup before checking the overlaps 1147 // The L1 Cache is used for fast texture lookup before checking the overlaps
1153 // This avoids calculating size and other stuffs. 1148 // This avoids calculating size and other stuffs.
1154 std::unordered_map<CacheAddr, TSurface> l1_cache; 1149 std::unordered_map<VAddr, TSurface> l1_cache;
1155 1150
1156 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1151 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
1157 /// previously been used. This is to prevent surfaces from being constantly created and 1152 /// previously been used. This is to prevent surfaces from being constantly created and
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
new file mode 100644
index 000000000..d1939d744
--- /dev/null
+++ b/src/video_core/textures/texture.cpp
@@ -0,0 +1,80 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7
8#include "core/settings.h"
9#include "video_core/textures/texture.h"
10
11namespace Tegra::Texture {
12
13namespace {
14
15constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
16 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
17 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
18 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
19 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
20 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
21 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
22 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
23 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
24 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
25 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
26 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
27 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
28 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
29 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
30 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
31 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
32 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
33 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
34 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
35 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
36 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
37 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
38 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
39 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
40 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
41 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
42 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
43 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
44 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
45 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
46 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
47 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
48};
49
50unsigned SettingsMinimumAnisotropy() noexcept {
51 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
52 default:
53 case Anisotropy::Default:
54 return 1U;
55 case Anisotropy::Filter2x:
56 return 2U;
57 case Anisotropy::Filter4x:
58 return 4U;
59 case Anisotropy::Filter8x:
60 return 8U;
61 case Anisotropy::Filter16x:
62 return 16U;
63 }
64}
65
66} // Anonymous namespace
67
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
69 if (!srgb_conversion) {
70 return border_color;
71 }
72 return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74}
75
76float TSCEntry::GetMaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78}
79
80} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7edc4abe1..59b8a5e66 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,7 +8,6 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/settings.h"
12 11
13namespace Tegra::Texture { 12namespace Tegra::Texture {
14 13
@@ -336,24 +335,9 @@ struct TSCEntry {
336 std::array<u8, 0x20> raw; 335 std::array<u8, 0x20> raw;
337 }; 336 };
338 337
339 float GetMaxAnisotropy() const { 338 std::array<float, 4> GetBorderColor() const noexcept;
340 const u32 min_value = [] { 339
341 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { 340 float GetMaxAnisotropy() const noexcept;
342 default:
343 case Anisotropy::Default:
344 return 1U;
345 case Anisotropy::Filter2x:
346 return 2U;
347 case Anisotropy::Filter4x:
348 return 4U;
349 case Anisotropy::Filter8x:
350 return 8U;
351 case Anisotropy::Filter16x:
352 return 16U;
353 }
354 }();
355 return static_cast<float>(std::max(1U << max_anisotropy, min_value));
356 }
357 341
358 float GetMinLod() const { 342 float GetMinLod() const {
359 return static_cast<float>(min_lod_clamp) / 256.0f; 343 return static_cast<float>(min_lod_clamp) / 256.0f;
@@ -368,15 +352,6 @@ struct TSCEntry {
368 constexpr u32 mask = 1U << (13 - 1); 352 constexpr u32 mask = 1U << (13 - 1);
369 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 353 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
370 } 354 }
371
372 std::array<float, 4> GetBorderColor() const {
373 if (srgb_conversion) {
374 return {static_cast<float>(srgb_border_color_r) / 255.0f,
375 static_cast<float>(srgb_border_color_g) / 255.0f,
376 static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
377 }
378 return border_color;
379 }
380}; 355};
381static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 356static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
382 357
diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp
index d39b3f07a..695b2ef5f 100644
--- a/src/yuzu/about_dialog.cpp
+++ b/src/yuzu/about_dialog.cpp
@@ -3,15 +3,22 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <QIcon> 5#include <QIcon>
6#include <fmt/format.h>
6#include "common/scm_rev.h" 7#include "common/scm_rev.h"
7#include "ui_aboutdialog.h" 8#include "ui_aboutdialog.h"
8#include "yuzu/about_dialog.h" 9#include "yuzu/about_dialog.h"
9 10
10AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { 11AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
12 const auto build_id = std::string(Common::g_build_id);
13 const auto fmt = std::string(Common::g_title_bar_format_idle);
14 const auto yuzu_build_version =
15 fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
16 std::string{}, std::string{}, std::string{}, build_id);
17
11 ui->setupUi(this); 18 ui->setupUi(this);
12 ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200)); 19 ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
13 ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg( 20 ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
14 QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch), 21 QString::fromStdString(yuzu_build_version), QString::fromUtf8(Common::g_scm_branch),
15 QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10))); 22 QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
16} 23}
17 24
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index eaded2640..7b211bd32 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -224,7 +224,6 @@ public:
224 } 224 }
225 225
226 context->MakeCurrent(); 226 context->MakeCurrent();
227 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
228 if (Core::System::GetInstance().Renderer().TryPresent(100)) { 227 if (Core::System::GetInstance().Renderer().TryPresent(100)) {
229 context->SwapBuffers(); 228 context->SwapBuffers();
230 glFinish(); 229 glFinish();
diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui
index c3a1b68f0..4b37746a1 100644
--- a/src/yuzu/configuration/configure_input_player.ui
+++ b/src/yuzu/configuration/configure_input_player.ui
@@ -927,7 +927,7 @@
927 </item> 927 </item>
928 </layout> 928 </layout>
929 </item> 929 </item>
930 <item row="2" column="0"> 930 <item row="0" column="2">
931 <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout"> 931 <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout">
932 <item> 932 <item>
933 <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout"> 933 <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout">
@@ -949,7 +949,7 @@
949 </item> 949 </item>
950 </layout> 950 </layout>
951 </item> 951 </item>
952 <item row="2" column="1"> 952 <item row="1" column="2">
953 <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout"> 953 <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout">
954 <item> 954 <item>
955 <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout"> 955 <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout">
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 940f24dc8..1717e06f9 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -205,7 +205,13 @@ GMainWindow::GMainWindow()
205 ConnectMenuEvents(); 205 ConnectMenuEvents();
206 ConnectWidgetEvents(); 206 ConnectWidgetEvents();
207 207
208 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, 208 const auto build_id = std::string(Common::g_build_id);
209 const auto fmt = std::string(Common::g_title_bar_format_idle);
210 const auto yuzu_build_version =
211 fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
212 std::string{}, std::string{}, std::string{}, build_id);
213
214 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
209 Common::g_scm_desc); 215 Common::g_scm_desc);
210#ifdef ARCHITECTURE_x86_64 216#ifdef ARCHITECTURE_x86_64
211 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); 217 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);