summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt93
-rw-r--r--src/common/common_paths.h1
-rw-r--r--src/common/file_util.cpp1
-rw-r--r--src/common/file_util.h1
-rw-r--r--src/common/scm_rev.cpp.in2
-rw-r--r--src/common/scm_rev.h1
-rw-r--r--src/core/core.cpp3
-rw-r--r--src/core/gdbstub/gdbstub.cpp14
-rw-r--r--src/core/hle/service/am/am.cpp7
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp92
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h25
-rw-r--r--src/core/hle/service/vi/vi.cpp62
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp2
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/dma_pusher.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.cpp16
-rw-r--r--src/video_core/engines/kepler_memory.cpp11
-rw-r--r--src/video_core/engines/maxwell_3d.cpp85
-rw-r--r--src/video_core/engines/maxwell_dma.cpp22
-rw-r--r--src/video_core/engines/shader_bytecode.h10
-rw-r--r--src/video_core/memory_manager.cpp3
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp242
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h7
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp484
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h89
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h56
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp656
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h245
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h5
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp32
-rw-r--r--src/video_core/renderer_opengl/gl_state.h12
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp69
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h8
-rw-r--r--src/video_core/shader/decode/memory.cpp70
-rw-r--r--src/video_core/shader/shader_ir.h9
-rw-r--r--src/video_core/textures/texture.h3
-rw-r--r--src/video_core/video_core.cpp5
-rw-r--r--src/video_core/video_core.h7
-rw-r--r--src/yuzu/bootmanager.cpp9
-rw-r--r--src/yuzu/bootmanager.h8
-rw-r--r--src/yuzu/configuration/config.cpp3
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp22
-rw-r--r--src/yuzu/configuration/configure_graphics.h2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui7
-rw-r--r--src/yuzu/loading_screen.cpp4
-rw-r--r--src/yuzu/loading_screen.ui5
-rw-r--r--src/yuzu/main.cpp19
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_cmd/yuzu.cpp3
60 files changed, 1999 insertions, 632 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 845626fc5..bdd885273 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -1,42 +1,69 @@
1# Generate cpp with Git revision from template 1# Add a custom command to generate a new shader_cache_version hash when any of the following files change
2# Also if this is a CI build, add the build name (ie: Nightly, Canary) to the scm_rev file as well 2# NOTE: This is an approximation of what files affect shader generation, its possible something else
3set(REPO_NAME "") 3# could affect the result, but much more unlikely than the following files. Keeping a list of files
4set(BUILD_VERSION "0") 4# like this allows for much better caching since it doesn't force the user to recompile binary shaders every update
5if ($ENV{CI}) 5set(VIDEO_CORE "${CMAKE_SOURCE_DIR}/src/video_core")
6 if ($ENV{TRAVIS}) 6if (DEFINED ENV{CI})
7 if (DEFINED ENV{TRAVIS})
7 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG}) 8 set(BUILD_REPOSITORY $ENV{TRAVIS_REPO_SLUG})
8 set(BUILD_TAG $ENV{TRAVIS_TAG}) 9 set(BUILD_TAG $ENV{TRAVIS_TAG})
9 elseif($ENV{APPVEYOR}) 10 elseif(DEFINED ENV{APPVEYOR})
10 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME}) 11 set(BUILD_REPOSITORY $ENV{APPVEYOR_REPO_NAME})
11 set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME}) 12 set(BUILD_TAG $ENV{APPVEYOR_REPO_TAG_NAME})
12 endif() 13 endif()
13 # regex capture the string nightly or canary into CMAKE_MATCH_1
14 string(REGEX MATCH "yuzu-emu/yuzu-?(.*)" OUTVAR ${BUILD_REPOSITORY})
15 if (${CMAKE_MATCH_COUNT} GREATER 0)
16 # capitalize the first letter of each word in the repo name.
17 string(REPLACE "-" ";" REPO_NAME_LIST ${CMAKE_MATCH_1})
18 foreach(WORD ${REPO_NAME_LIST})
19 string(SUBSTRING ${WORD} 0 1 FIRST_LETTER)
20 string(SUBSTRING ${WORD} 1 -1 REMAINDER)
21 string(TOUPPER ${FIRST_LETTER} FIRST_LETTER)
22 set(REPO_NAME "${REPO_NAME}${FIRST_LETTER}${REMAINDER}")
23 endforeach()
24 if (BUILD_TAG)
25 string(REGEX MATCH "${CMAKE_MATCH_1}-([0-9]+)" OUTVAR ${BUILD_TAG})
26 if (${CMAKE_MATCH_COUNT} GREATER 0)
27 set(BUILD_VERSION ${CMAKE_MATCH_1})
28 endif()
29 if (BUILD_VERSION)
30 # This leaves a trailing space on the last word, but we actually want that
31 # because of how it's styled in the title bar.
32 set(BUILD_FULLNAME "${REPO_NAME} ${BUILD_VERSION} ")
33 else()
34 set(BUILD_FULLNAME "")
35 endif()
36 endif()
37 endif()
38endif() 14endif()
39configure_file("${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in" "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp" @ONLY) 15add_custom_command(OUTPUT scm_rev.cpp
16 COMMAND ${CMAKE_COMMAND}
17 -DSRC_DIR="${CMAKE_SOURCE_DIR}"
18 -DBUILD_REPOSITORY="${BUILD_REPOSITORY}"
19 -DBUILD_TAG="${BUILD_TAG}"
20 -P "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
21 DEPENDS
22 # WARNING! It was too much work to try and make a common location for this list,
23 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
24 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
25 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
26 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
27 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
28 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
29 "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
30 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.cpp"
31 "${VIDEO_CORE}/renderer_opengl/gl_shader_gen.h"
32 "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
33 "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
34 "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
35 "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
36 "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
37 "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
38 "${VIDEO_CORE}/shader/decode/bfe.cpp"
39 "${VIDEO_CORE}/shader/decode/bfi.cpp"
40 "${VIDEO_CORE}/shader/decode/conversion.cpp"
41 "${VIDEO_CORE}/shader/decode/ffma.cpp"
42 "${VIDEO_CORE}/shader/decode/float_set.cpp"
43 "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
44 "${VIDEO_CORE}/shader/decode/half_set.cpp"
45 "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
46 "${VIDEO_CORE}/shader/decode/hfma2.cpp"
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
53 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
54 "${VIDEO_CORE}/shader/decode/shift.cpp"
55 "${VIDEO_CORE}/shader/decode/video.cpp"
56 "${VIDEO_CORE}/shader/decode/xmad.cpp"
57 "${VIDEO_CORE}/shader/decode.cpp"
58 "${VIDEO_CORE}/shader/shader_ir.cpp"
59 "${VIDEO_CORE}/shader/shader_ir.h"
60 "${VIDEO_CORE}/shader/track.cpp"
61 # and also check that the scm_rev files haven't changed
62 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
63 "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
64 # technically we should regenerate if the git version changed, but its not worth the effort imo
65 "${CMAKE_SOURCE_DIR}/CMakeModules/GenerateSCMRev.cmake"
66)
40 67
41add_library(common STATIC 68add_library(common STATIC
42 alignment.h 69 alignment.h
diff --git a/src/common/common_paths.h b/src/common/common_paths.h
index 4f88de768..076752d3b 100644
--- a/src/common/common_paths.h
+++ b/src/common/common_paths.h
@@ -35,6 +35,7 @@
35#define KEYS_DIR "keys" 35#define KEYS_DIR "keys"
36#define LOAD_DIR "load" 36#define LOAD_DIR "load"
37#define DUMP_DIR "dump" 37#define DUMP_DIR "dump"
38#define SHADER_DIR "shader"
38#define LOG_DIR "log" 39#define LOG_DIR "log"
39 40
40// Filenames 41// Filenames
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index b52492da6..aecb66c32 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -710,6 +710,7 @@ const std::string& GetUserPath(UserPath path, const std::string& new_path) {
710 paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP); 710 paths.emplace(UserPath::NANDDir, user_path + NAND_DIR DIR_SEP);
711 paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP); 711 paths.emplace(UserPath::LoadDir, user_path + LOAD_DIR DIR_SEP);
712 paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP); 712 paths.emplace(UserPath::DumpDir, user_path + DUMP_DIR DIR_SEP);
713 paths.emplace(UserPath::ShaderDir, user_path + SHADER_DIR DIR_SEP);
713 paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP); 714 paths.emplace(UserPath::SysDataDir, user_path + SYSDATA_DIR DIR_SEP);
714 paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP); 715 paths.emplace(UserPath::KeysDir, user_path + KEYS_DIR DIR_SEP);
715 // TODO: Put the logs in a better location for each OS 716 // TODO: Put the logs in a better location for each OS
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 571503d2a..38cc7f059 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -31,6 +31,7 @@ enum class UserPath {
31 SDMCDir, 31 SDMCDir,
32 LoadDir, 32 LoadDir,
33 DumpDir, 33 DumpDir,
34 ShaderDir,
34 SysDataDir, 35 SysDataDir,
35 UserDir, 36 UserDir,
36}; 37};
diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in
index 2b1727769..d69038f65 100644
--- a/src/common/scm_rev.cpp.in
+++ b/src/common/scm_rev.cpp.in
@@ -11,6 +11,7 @@
11#define BUILD_DATE "@BUILD_DATE@" 11#define BUILD_DATE "@BUILD_DATE@"
12#define BUILD_FULLNAME "@BUILD_FULLNAME@" 12#define BUILD_FULLNAME "@BUILD_FULLNAME@"
13#define BUILD_VERSION "@BUILD_VERSION@" 13#define BUILD_VERSION "@BUILD_VERSION@"
14#define SHADER_CACHE_VERSION "@SHADER_CACHE_VERSION@"
14 15
15namespace Common { 16namespace Common {
16 17
@@ -21,6 +22,7 @@ const char g_build_name[] = BUILD_NAME;
21const char g_build_date[] = BUILD_DATE; 22const char g_build_date[] = BUILD_DATE;
22const char g_build_fullname[] = BUILD_FULLNAME; 23const char g_build_fullname[] = BUILD_FULLNAME;
23const char g_build_version[] = BUILD_VERSION; 24const char g_build_version[] = BUILD_VERSION;
25const char g_shader_cache_version[] = SHADER_CACHE_VERSION;
24 26
25} // namespace 27} // namespace
26 28
diff --git a/src/common/scm_rev.h b/src/common/scm_rev.h
index af9a9daed..666bf0367 100644
--- a/src/common/scm_rev.h
+++ b/src/common/scm_rev.h
@@ -13,5 +13,6 @@ extern const char g_build_name[];
13extern const char g_build_date[]; 13extern const char g_build_date[];
14extern const char g_build_fullname[]; 14extern const char g_build_fullname[];
15extern const char g_build_version[]; 15extern const char g_build_version[];
16extern const char g_shader_cache_version[];
16 17
17} // namespace Common 18} // namespace Common
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 572814e4b..1dd576c26 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -123,7 +123,7 @@ struct System::Impl {
123 Service::Init(service_manager, *virtual_filesystem); 123 Service::Init(service_manager, *virtual_filesystem);
124 GDBStub::Init(); 124 GDBStub::Init();
125 125
126 renderer = VideoCore::CreateRenderer(emu_window); 126 renderer = VideoCore::CreateRenderer(emu_window, system);
127 if (!renderer->Init()) { 127 if (!renderer->Init()) {
128 return ResultStatus::ErrorVideoCore; 128 return ResultStatus::ErrorVideoCore;
129 } 129 }
@@ -175,6 +175,7 @@ struct System::Impl {
175 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) + 175 return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
176 static_cast<u32>(load_result)); 176 static_cast<u32>(load_result));
177 } 177 }
178
178 status = ResultStatus::Success; 179 status = ResultStatus::Success;
179 return status; 180 return status;
180 } 181 }
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index a1cad4fcb..dafb32aae 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -507,8 +507,11 @@ static void RemoveBreakpoint(BreakpointType type, VAddr addr) {
507 507
508 LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}", 508 LOG_DEBUG(Debug_GDBStub, "gdb: removed a breakpoint: {:016X} bytes at {:016X} of type {}",
509 bp->second.len, bp->second.addr, static_cast<int>(type)); 509 bp->second.len, bp->second.addr, static_cast<int>(type));
510 Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size()); 510
511 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 511 if (type == BreakpointType::Execute) {
512 Memory::WriteBlock(bp->second.addr, bp->second.inst.data(), bp->second.inst.size());
513 Core::System::GetInstance().InvalidateCpuInstructionCaches();
514 }
512 p.erase(addr); 515 p.erase(addr);
513} 516}
514 517
@@ -1057,9 +1060,12 @@ static bool CommitBreakpoint(BreakpointType type, VAddr addr, u64 len) {
1057 breakpoint.addr = addr; 1060 breakpoint.addr = addr;
1058 breakpoint.len = len; 1061 breakpoint.len = len;
1059 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size()); 1062 Memory::ReadBlock(addr, breakpoint.inst.data(), breakpoint.inst.size());
1063
1060 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4}; 1064 static constexpr std::array<u8, 4> btrap{0x00, 0x7d, 0x20, 0xd4};
1061 Memory::WriteBlock(addr, btrap.data(), btrap.size()); 1065 if (type == BreakpointType::Execute) {
1062 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 1066 Memory::WriteBlock(addr, btrap.data(), btrap.size());
1067 Core::System::GetInstance().InvalidateCpuInstructionCaches();
1068 }
1063 p.insert({addr, breakpoint}); 1069 p.insert({addr, breakpoint});
1064 1070
1065 LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}", 1071 LOG_DEBUG(Debug_GDBStub, "gdb: added {} breakpoint: {:016X} bytes at {:016X}",
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index d1cbe0e44..3f009d2b7 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -322,14 +322,15 @@ void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& c
322 322
323void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) { 323void ISelfController::CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx) {
324 LOG_WARNING(Service_AM, "(STUBBED) called"); 324 LOG_WARNING(Service_AM, "(STUBBED) called");
325
325 // TODO(Subv): Find out how AM determines the display to use, for now just 326 // TODO(Subv): Find out how AM determines the display to use, for now just
326 // create the layer in the Default display. 327 // create the layer in the Default display.
327 u64 display_id = nvflinger->OpenDisplay("Default"); 328 const auto display_id = nvflinger->OpenDisplay("Default");
328 u64 layer_id = nvflinger->CreateLayer(display_id); 329 const auto layer_id = nvflinger->CreateLayer(*display_id);
329 330
330 IPC::ResponseBuilder rb{ctx, 4}; 331 IPC::ResponseBuilder rb{ctx, 4};
331 rb.Push(RESULT_SUCCESS); 332 rb.Push(RESULT_SUCCESS);
332 rb.Push(layer_id); 333 rb.Push(*layer_id);
333} 334}
334 335
335void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) { 336void ISelfController::SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 8dfc0df03..cde06916d 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -46,7 +46,7 @@ void NVFlinger::SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance) {
46 nvdrv = std::move(instance); 46 nvdrv = std::move(instance);
47} 47}
48 48
49u64 NVFlinger::OpenDisplay(std::string_view name) { 49std::optional<u64> NVFlinger::OpenDisplay(std::string_view name) {
50 LOG_DEBUG(Service, "Opening \"{}\" display", name); 50 LOG_DEBUG(Service, "Opening \"{}\" display", name);
51 51
52 // TODO(Subv): Currently we only support the Default display. 52 // TODO(Subv): Currently we only support the Default display.
@@ -54,32 +54,48 @@ u64 NVFlinger::OpenDisplay(std::string_view name) {
54 54
55 const auto itr = std::find_if(displays.begin(), displays.end(), 55 const auto itr = std::find_if(displays.begin(), displays.end(),
56 [&](const Display& display) { return display.name == name; }); 56 [&](const Display& display) { return display.name == name; });
57 57 if (itr == displays.end()) {
58 ASSERT(itr != displays.end()); 58 return {};
59 }
59 60
60 return itr->id; 61 return itr->id;
61} 62}
62 63
63u64 NVFlinger::CreateLayer(u64 display_id) { 64std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
64 auto& display = FindDisplay(display_id); 65 auto* const display = FindDisplay(display_id);
66
67 if (display == nullptr) {
68 return {};
69 }
65 70
66 ASSERT_MSG(display.layers.empty(), "Only one layer is supported per display at the moment"); 71 ASSERT_MSG(display->layers.empty(), "Only one layer is supported per display at the moment");
67 72
68 const u64 layer_id = next_layer_id++; 73 const u64 layer_id = next_layer_id++;
69 const u32 buffer_queue_id = next_buffer_queue_id++; 74 const u32 buffer_queue_id = next_buffer_queue_id++;
70 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id); 75 auto buffer_queue = std::make_shared<BufferQueue>(buffer_queue_id, layer_id);
71 display.layers.emplace_back(layer_id, buffer_queue); 76 display->layers.emplace_back(layer_id, buffer_queue);
72 buffer_queues.emplace_back(std::move(buffer_queue)); 77 buffer_queues.emplace_back(std::move(buffer_queue));
73 return layer_id; 78 return layer_id;
74} 79}
75 80
76u32 NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const { 81std::optional<u32> NVFlinger::FindBufferQueueId(u64 display_id, u64 layer_id) const {
77 const auto& layer = FindLayer(display_id, layer_id); 82 const auto* const layer = FindLayer(display_id, layer_id);
78 return layer.buffer_queue->GetId(); 83
84 if (layer == nullptr) {
85 return {};
86 }
87
88 return layer->buffer_queue->GetId();
79} 89}
80 90
81Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::GetVsyncEvent(u64 display_id) { 91Kernel::SharedPtr<Kernel::ReadableEvent> NVFlinger::FindVsyncEvent(u64 display_id) const {
82 return FindDisplay(display_id).vsync_event.readable; 92 auto* const display = FindDisplay(display_id);
93
94 if (display == nullptr) {
95 return nullptr;
96 }
97
98 return display->vsync_event.readable;
83} 99}
84 100
85std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const { 101std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
@@ -90,40 +106,60 @@ std::shared_ptr<BufferQueue> NVFlinger::FindBufferQueue(u32 id) const {
90 return *itr; 106 return *itr;
91} 107}
92 108
93Display& NVFlinger::FindDisplay(u64 display_id) { 109Display* NVFlinger::FindDisplay(u64 display_id) {
94 const auto itr = std::find_if(displays.begin(), displays.end(), 110 const auto itr = std::find_if(displays.begin(), displays.end(),
95 [&](const Display& display) { return display.id == display_id; }); 111 [&](const Display& display) { return display.id == display_id; });
96 112
97 ASSERT(itr != displays.end()); 113 if (itr == displays.end()) {
98 return *itr; 114 return nullptr;
115 }
116
117 return &*itr;
99} 118}
100 119
101const Display& NVFlinger::FindDisplay(u64 display_id) const { 120const Display* NVFlinger::FindDisplay(u64 display_id) const {
102 const auto itr = std::find_if(displays.begin(), displays.end(), 121 const auto itr = std::find_if(displays.begin(), displays.end(),
103 [&](const Display& display) { return display.id == display_id; }); 122 [&](const Display& display) { return display.id == display_id; });
104 123
105 ASSERT(itr != displays.end()); 124 if (itr == displays.end()) {
106 return *itr; 125 return nullptr;
126 }
127
128 return &*itr;
107} 129}
108 130
109Layer& NVFlinger::FindLayer(u64 display_id, u64 layer_id) { 131Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) {
110 auto& display = FindDisplay(display_id); 132 auto* const display = FindDisplay(display_id);
111 133
112 const auto itr = std::find_if(display.layers.begin(), display.layers.end(), 134 if (display == nullptr) {
135 return nullptr;
136 }
137
138 const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
113 [&](const Layer& layer) { return layer.id == layer_id; }); 139 [&](const Layer& layer) { return layer.id == layer_id; });
114 140
115 ASSERT(itr != display.layers.end()); 141 if (itr == display->layers.end()) {
116 return *itr; 142 return nullptr;
143 }
144
145 return &*itr;
117} 146}
118 147
119const Layer& NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { 148const Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const {
120 const auto& display = FindDisplay(display_id); 149 const auto* const display = FindDisplay(display_id);
150
151 if (display == nullptr) {
152 return nullptr;
153 }
121 154
122 const auto itr = std::find_if(display.layers.begin(), display.layers.end(), 155 const auto itr = std::find_if(display->layers.begin(), display->layers.end(),
123 [&](const Layer& layer) { return layer.id == layer_id; }); 156 [&](const Layer& layer) { return layer.id == layer_id; });
124 157
125 ASSERT(itr != display.layers.end()); 158 if (itr == display->layers.end()) {
126 return *itr; 159 return nullptr;
160 }
161
162 return &*itr;
127} 163}
128 164
129void NVFlinger::Compose() { 165void NVFlinger::Compose() {
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 83e974ed3..4c55e99f4 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -6,6 +6,7 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <optional>
9#include <string> 10#include <string>
10#include <string_view> 11#include <string_view>
11#include <vector> 12#include <vector>
@@ -58,16 +59,24 @@ public:
58 void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance); 59 void SetNVDrvInstance(std::shared_ptr<Nvidia::Module> instance);
59 60
60 /// Opens the specified display and returns the ID. 61 /// Opens the specified display and returns the ID.
61 u64 OpenDisplay(std::string_view name); 62 ///
63 /// If an invalid display name is provided, then an empty optional is returned.
64 std::optional<u64> OpenDisplay(std::string_view name);
62 65
63 /// Creates a layer on the specified display and returns the layer ID. 66 /// Creates a layer on the specified display and returns the layer ID.
64 u64 CreateLayer(u64 display_id); 67 ///
68 /// If an invalid display ID is specified, then an empty optional is returned.
69 std::optional<u64> CreateLayer(u64 display_id);
65 70
66 /// Finds the buffer queue ID of the specified layer in the specified display. 71 /// Finds the buffer queue ID of the specified layer in the specified display.
67 u32 FindBufferQueueId(u64 display_id, u64 layer_id) const; 72 ///
73 /// If an invalid display ID or layer ID is provided, then an empty optional is returned.
74 std::optional<u32> FindBufferQueueId(u64 display_id, u64 layer_id) const;
68 75
69 /// Gets the vsync event for the specified display. 76 /// Gets the vsync event for the specified display.
70 Kernel::SharedPtr<Kernel::ReadableEvent> GetVsyncEvent(u64 display_id); 77 ///
78 /// If an invalid display ID is provided, then nullptr is returned.
79 Kernel::SharedPtr<Kernel::ReadableEvent> FindVsyncEvent(u64 display_id) const;
71 80
72 /// Obtains a buffer queue identified by the ID. 81 /// Obtains a buffer queue identified by the ID.
73 std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const; 82 std::shared_ptr<BufferQueue> FindBufferQueue(u32 id) const;
@@ -78,16 +87,16 @@ public:
78 87
79private: 88private:
80 /// Finds the display identified by the specified ID. 89 /// Finds the display identified by the specified ID.
81 Display& FindDisplay(u64 display_id); 90 Display* FindDisplay(u64 display_id);
82 91
83 /// Finds the display identified by the specified ID. 92 /// Finds the display identified by the specified ID.
84 const Display& FindDisplay(u64 display_id) const; 93 const Display* FindDisplay(u64 display_id) const;
85 94
86 /// Finds the layer identified by the specified ID in the desired display. 95 /// Finds the layer identified by the specified ID in the desired display.
87 Layer& FindLayer(u64 display_id, u64 layer_id); 96 Layer* FindLayer(u64 display_id, u64 layer_id);
88 97
89 /// Finds the layer identified by the specified ID in the desired display. 98 /// Finds the layer identified by the specified ID in the desired display.
90 const Layer& FindLayer(u64 display_id, u64 layer_id) const; 99 const Layer* FindLayer(u64 display_id, u64 layer_id) const;
91 100
92 std::shared_ptr<Nvidia::Module> nvdrv; 101 std::shared_ptr<Nvidia::Module> nvdrv;
93 102
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index fe08c38f2..a317a2885 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -34,6 +34,7 @@ namespace Service::VI {
34 34
35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; 35constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1};
36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; 36constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6};
37constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7};
37 38
38struct DisplayInfo { 39struct DisplayInfo {
39 /// The name of this particular display. 40 /// The name of this particular display.
@@ -838,11 +839,16 @@ private:
838 "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}", 839 "(STUBBED) called. unknown=0x{:08X}, display=0x{:016X}, aruid=0x{:016X}",
839 unknown, display, aruid); 840 unknown, display, aruid);
840 841
841 const u64 layer_id = nv_flinger->CreateLayer(display); 842 const auto layer_id = nv_flinger->CreateLayer(display);
843 if (!layer_id) {
844 IPC::ResponseBuilder rb{ctx, 2};
845 rb.Push(ERR_NOT_FOUND);
846 return;
847 }
842 848
843 IPC::ResponseBuilder rb{ctx, 4}; 849 IPC::ResponseBuilder rb{ctx, 4};
844 rb.Push(RESULT_SUCCESS); 850 rb.Push(RESULT_SUCCESS);
845 rb.Push(layer_id); 851 rb.Push(*layer_id);
846 } 852 }
847 853
848 void AddToLayerStack(Kernel::HLERequestContext& ctx) { 854 void AddToLayerStack(Kernel::HLERequestContext& ctx) {
@@ -950,9 +956,16 @@ private:
950 956
951 ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet"); 957 ASSERT_MSG(name == "Default", "Non-default displays aren't supported yet");
952 958
959 const auto display_id = nv_flinger->OpenDisplay(name);
960 if (!display_id) {
961 IPC::ResponseBuilder rb{ctx, 2};
962 rb.Push(ERR_NOT_FOUND);
963 return;
964 }
965
953 IPC::ResponseBuilder rb{ctx, 4}; 966 IPC::ResponseBuilder rb{ctx, 4};
954 rb.Push(RESULT_SUCCESS); 967 rb.Push(RESULT_SUCCESS);
955 rb.Push<u64>(nv_flinger->OpenDisplay(name)); 968 rb.Push<u64>(*display_id);
956 } 969 }
957 970
958 void CloseDisplay(Kernel::HLERequestContext& ctx) { 971 void CloseDisplay(Kernel::HLERequestContext& ctx) {
@@ -1043,10 +1056,21 @@ private:
1043 1056
1044 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid); 1057 LOG_DEBUG(Service_VI, "called. layer_id=0x{:016X}, aruid=0x{:016X}", layer_id, aruid);
1045 1058
1046 const u64 display_id = nv_flinger->OpenDisplay(display_name); 1059 const auto display_id = nv_flinger->OpenDisplay(display_name);
1047 const u32 buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, layer_id); 1060 if (!display_id) {
1061 IPC::ResponseBuilder rb{ctx, 2};
1062 rb.Push(ERR_NOT_FOUND);
1063 return;
1064 }
1065
1066 const auto buffer_queue_id = nv_flinger->FindBufferQueueId(*display_id, layer_id);
1067 if (!buffer_queue_id) {
1068 IPC::ResponseBuilder rb{ctx, 2};
1069 rb.Push(ERR_NOT_FOUND);
1070 return;
1071 }
1048 1072
1049 NativeWindow native_window{buffer_queue_id}; 1073 NativeWindow native_window{*buffer_queue_id};
1050 IPC::ResponseBuilder rb{ctx, 4}; 1074 IPC::ResponseBuilder rb{ctx, 4};
1051 rb.Push(RESULT_SUCCESS); 1075 rb.Push(RESULT_SUCCESS);
1052 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1076 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
@@ -1062,13 +1086,24 @@ private:
1062 1086
1063 // TODO(Subv): What's the difference between a Stray and a Managed layer? 1087 // TODO(Subv): What's the difference between a Stray and a Managed layer?
1064 1088
1065 const u64 layer_id = nv_flinger->CreateLayer(display_id); 1089 const auto layer_id = nv_flinger->CreateLayer(display_id);
1066 const u32 buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, layer_id); 1090 if (!layer_id) {
1091 IPC::ResponseBuilder rb{ctx, 2};
1092 rb.Push(ERR_NOT_FOUND);
1093 return;
1094 }
1095
1096 const auto buffer_queue_id = nv_flinger->FindBufferQueueId(display_id, *layer_id);
1097 if (!buffer_queue_id) {
1098 IPC::ResponseBuilder rb{ctx, 2};
1099 rb.Push(ERR_NOT_FOUND);
1100 return;
1101 }
1067 1102
1068 NativeWindow native_window{buffer_queue_id}; 1103 NativeWindow native_window{*buffer_queue_id};
1069 IPC::ResponseBuilder rb{ctx, 6}; 1104 IPC::ResponseBuilder rb{ctx, 6};
1070 rb.Push(RESULT_SUCCESS); 1105 rb.Push(RESULT_SUCCESS);
1071 rb.Push(layer_id); 1106 rb.Push(*layer_id);
1072 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize())); 1107 rb.Push<u64>(ctx.WriteBuffer(native_window.Serialize()));
1073 } 1108 }
1074 1109
@@ -1088,7 +1123,12 @@ private:
1088 1123
1089 LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id); 1124 LOG_WARNING(Service_VI, "(STUBBED) called. display_id=0x{:016X}", display_id);
1090 1125
1091 const auto vsync_event = nv_flinger->GetVsyncEvent(display_id); 1126 const auto vsync_event = nv_flinger->FindVsyncEvent(display_id);
1127 if (!vsync_event) {
1128 IPC::ResponseBuilder rb{ctx, 2};
1129 rb.Push(ERR_NOT_FOUND);
1130 return;
1131 }
1092 1132
1093 IPC::ResponseBuilder rb{ctx, 2, 1}; 1133 IPC::ResponseBuilder rb{ctx, 2, 1};
1094 rb.Push(RESULT_SUCCESS); 1134 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/settings.h b/src/core/settings.h
index c97387fc7..7e76e0466 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -391,6 +391,7 @@ struct Values {
391 float resolution_factor; 391 float resolution_factor;
392 bool use_frame_limit; 392 bool use_frame_limit;
393 u16 frame_limit; 393 u16 frame_limit;
394 bool use_disk_shader_cache;
394 bool use_accurate_gpu_emulation; 395 bool use_accurate_gpu_emulation;
395 396
396 float bg_red; 397 float bg_red;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 09ed74d78..58dfcc4df 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -158,6 +158,8 @@ TelemetrySession::TelemetrySession() {
158 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit", 158 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseFrameLimit",
159 Settings::values.use_frame_limit); 159 Settings::values.use_frame_limit);
160 AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit); 160 AddField(Telemetry::FieldType::UserConfig, "Renderer_FrameLimit", Settings::values.frame_limit);
161 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseDiskShaderCache",
162 Settings::values.use_disk_shader_cache);
161 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", 163 AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation",
162 Settings::values.use_accurate_gpu_emulation); 164 Settings::values.use_accurate_gpu_emulation);
163 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", 165 AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode",
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6113e17ff..33e507e69 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -44,6 +44,8 @@ add_library(video_core STATIC
44 renderer_opengl/gl_shader_cache.h 44 renderer_opengl/gl_shader_cache.h
45 renderer_opengl/gl_shader_decompiler.cpp 45 renderer_opengl/gl_shader_decompiler.cpp
46 renderer_opengl/gl_shader_decompiler.h 46 renderer_opengl/gl_shader_decompiler.h
47 renderer_opengl/gl_shader_disk_cache.cpp
48 renderer_opengl/gl_shader_disk_cache.h
47 renderer_opengl/gl_shader_gen.cpp 49 renderer_opengl/gl_shader_gen.cpp
48 renderer_opengl/gl_shader_gen.h 50 renderer_opengl/gl_shader_gen.h
49 renderer_opengl/gl_shader_manager.cpp 51 renderer_opengl/gl_shader_manager.cpp
@@ -102,4 +104,4 @@ add_library(video_core STATIC
102create_target_directory_groups(video_core) 104create_target_directory_groups(video_core)
103 105
104target_link_libraries(video_core PUBLIC common core) 106target_link_libraries(video_core PUBLIC common core)
105target_link_libraries(video_core PRIVATE glad) 107target_link_libraries(video_core PRIVATE glad lz4_static)
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 63a958f11..eb9bf1878 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -35,8 +35,10 @@ void DmaPusher::DispatchCalls() {
35bool DmaPusher::Step() { 35bool DmaPusher::Step() {
36 if (dma_get != dma_put) { 36 if (dma_get != dma_put) {
37 // Push buffer non-empty, read a word 37 // Push buffer non-empty, read a word
38 const CommandHeader command_header{ 38 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
39 Memory::Read32(*gpu.MemoryManager().GpuToCpuAddress(dma_get))}; 39 ASSERT_MSG(address, "Invalid GPU address");
40
41 const CommandHeader command_header{Memory::Read32(*address)};
40 42
41 dma_get += sizeof(u32); 43 dma_get += sizeof(u32);
42 44
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 80f70e332..9f1533263 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -42,8 +42,10 @@ void Fermi2D::HandleSurfaceCopy() {
42 // TODO(Subv): Only raw copies are implemented. 42 // TODO(Subv): Only raw copies are implemented.
43 ASSERT(regs.operation == Regs::Operation::SrcCopy); 43 ASSERT(regs.operation == Regs::Operation::SrcCopy);
44 44
45 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); 45 const auto source_cpu = memory_manager.GpuToCpuAddress(source);
46 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); 46 const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
47 ASSERT_MSG(source_cpu, "Invalid source GPU address");
48 ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
47 49
48 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format); 50 u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
49 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format); 51 u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
@@ -52,22 +54,22 @@ void Fermi2D::HandleSurfaceCopy() {
52 // All copies here update the main memory, so mark all rasterizer states as invalid. 54 // All copies here update the main memory, so mark all rasterizer states as invalid.
53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 55 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
54 56
55 rasterizer.FlushRegion(source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height); 57 rasterizer.FlushRegion(*source_cpu, src_bytes_per_pixel * regs.src.width * regs.src.height);
56 // We have to invalidate the destination region to evict any outdated surfaces from the 58 // We have to invalidate the destination region to evict any outdated surfaces from the
57 // cache. We do this before actually writing the new data because the destination address 59 // cache. We do this before actually writing the new data because the destination address
58 // might contain a dirty surface that will have to be written back to memory. 60 // might contain a dirty surface that will have to be written back to memory.
59 rasterizer.InvalidateRegion(dest_cpu, 61 rasterizer.InvalidateRegion(*dest_cpu,
60 dst_bytes_per_pixel * regs.dst.width * regs.dst.height); 62 dst_bytes_per_pixel * regs.dst.width * regs.dst.height);
61 63
62 if (regs.src.linear == regs.dst.linear) { 64 if (regs.src.linear == regs.dst.linear) {
63 // If the input layout and the output layout are the same, just perform a raw copy. 65 // If the input layout and the output layout are the same, just perform a raw copy.
64 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight()); 66 ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
65 Memory::CopyBlock(dest_cpu, source_cpu, 67 Memory::CopyBlock(*dest_cpu, *source_cpu,
66 src_bytes_per_pixel * regs.dst.width * regs.dst.height); 68 src_bytes_per_pixel * regs.dst.width * regs.dst.height);
67 return; 69 return;
68 } 70 }
69 u8* src_buffer = Memory::GetPointer(source_cpu); 71 u8* src_buffer = Memory::GetPointer(*source_cpu);
70 u8* dst_buffer = Memory::GetPointer(dest_cpu); 72 u8* dst_buffer = Memory::GetPointer(*dest_cpu);
71 if (!regs.src.linear && regs.dst.linear) { 73 if (!regs.src.linear && regs.dst.linear) {
72 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 74 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
73 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth, 75 Texture::CopySwizzledData(regs.src.width, regs.src.height, regs.src.depth,
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 4880191fc..5c1029ddf 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -39,16 +39,17 @@ void KeplerMemory::ProcessData(u32 data) {
39 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 39 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
40 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 40 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
41 41
42 GPUVAddr address = regs.dest.Address(); 42 const GPUVAddr address = regs.dest.Address();
43 VAddr dest_address = 43 const auto dest_address =
44 *memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32)); 44 memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
45 ASSERT_MSG(dest_address, "Invalid GPU address");
45 46
46 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 47 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
47 // We do this before actually writing the new data because the destination address might contain 48 // We do this before actually writing the new data because the destination address might contain
48 // a dirty surface that will have to be written back to memory. 49 // a dirty surface that will have to be written back to memory.
49 rasterizer.InvalidateRegion(dest_address, sizeof(u32)); 50 rasterizer.InvalidateRegion(*dest_address, sizeof(u32));
50 51
51 Memory::Write32(dest_address, data); 52 Memory::Write32(*dest_address, data);
52 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 53 Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
53 54
54 state.write_offset++; 55 state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a388b3944..10eae6a65 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -273,7 +273,8 @@ void Maxwell3D::ProcessQueryGet() {
273 GPUVAddr sequence_address = regs.query.QueryAddress(); 273 GPUVAddr sequence_address = regs.query.QueryAddress();
274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
275 // VAddr before writing. 275 // VAddr before writing.
276 std::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address); 276 const auto address = memory_manager.GpuToCpuAddress(sequence_address);
277 ASSERT_MSG(address, "Invalid GPU address");
277 278
278 // TODO(Subv): Support the other query units. 279 // TODO(Subv): Support the other query units.
279 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 280 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -386,14 +387,14 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
386 387
387void Maxwell3D::ProcessCBData(u32 value) { 388void Maxwell3D::ProcessCBData(u32 value) {
388 // Write the input value to the current const buffer at the current position. 389 // Write the input value to the current const buffer at the current position.
389 GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 390 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
390 ASSERT(buffer_address != 0); 391 ASSERT(buffer_address != 0);
391 392
392 // Don't allow writing past the end of the buffer. 393 // Don't allow writing past the end of the buffer.
393 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 394 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
394 395
395 std::optional<VAddr> address = 396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
396 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 397 ASSERT_MSG(address, "Invalid GPU address");
397 398
398 Memory::Write32(*address, value); 399 Memory::Write32(*address, value);
399 dirty_flags.OnMemoryWrite(); 400 dirty_flags.OnMemoryWrite();
@@ -403,10 +404,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
403} 404}
404 405
405Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 406Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
406 GPUVAddr tic_base_address = regs.tic.TICAddress(); 407 const GPUVAddr tic_base_address = regs.tic.TICAddress();
407 408
408 GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); 409 const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
409 std::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu); 410 const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
411 ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
410 412
411 Texture::TICEntry tic_entry; 413 Texture::TICEntry tic_entry;
412 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 414 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -415,10 +417,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
415 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 417 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
416 "TIC versions other than BlockLinear or Pitch are unimplemented"); 418 "TIC versions other than BlockLinear or Pitch are unimplemented");
417 419
418 auto r_type = tic_entry.r_type.Value(); 420 const auto r_type = tic_entry.r_type.Value();
419 auto g_type = tic_entry.g_type.Value(); 421 const auto g_type = tic_entry.g_type.Value();
420 auto b_type = tic_entry.b_type.Value(); 422 const auto b_type = tic_entry.b_type.Value();
421 auto a_type = tic_entry.a_type.Value(); 423 const auto a_type = tic_entry.a_type.Value();
422 424
423 // TODO(Subv): Different data types for separate components are not supported 425 // TODO(Subv): Different data types for separate components are not supported
424 ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); 426 ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
@@ -427,10 +429,11 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
427} 429}
428 430
429Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 431Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
430 GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 432 const GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
431 433
432 GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); 434 const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
433 std::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu); 435 const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
436 ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
434 437
435 Texture::TSCEntry tsc_entry; 438 Texture::TSCEntry tsc_entry;
436 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 439 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
@@ -452,8 +455,10 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
452 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 455 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
453 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 456 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
454 457
455 Texture::TextureHandle tex_handle{ 458 const auto address = memory_manager.GpuToCpuAddress(current_texture);
456 Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))}; 459 ASSERT_MSG(address, "Invalid GPU address");
460
461 const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
457 462
458 Texture::FullTextureInfo tex_info{}; 463 Texture::FullTextureInfo tex_info{};
459 // TODO(Subv): Use the shader to determine which textures are actually accessed. 464 // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -462,23 +467,16 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
462 sizeof(Texture::TextureHandle); 467 sizeof(Texture::TextureHandle);
463 468
464 // Load the TIC data. 469 // Load the TIC data.
465 if (tex_handle.tic_id != 0) { 470 auto tic_entry = GetTICEntry(tex_handle.tic_id);
466 tex_info.enabled = true; 471 // TODO(Subv): Workaround for BitField's move constructor being deleted.
467 472 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
468 auto tic_entry = GetTICEntry(tex_handle.tic_id);
469 // TODO(Subv): Workaround for BitField's move constructor being deleted.
470 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
471 }
472 473
473 // Load the TSC data 474 // Load the TSC data
474 if (tex_handle.tsc_id != 0) { 475 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
475 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); 476 // TODO(Subv): Workaround for BitField's move constructor being deleted.
476 // TODO(Subv): Workaround for BitField's move constructor being deleted. 477 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
477 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
478 }
479 478
480 if (tex_info.enabled) 479 textures.push_back(tex_info);
481 textures.push_back(tex_info);
482 } 480 }
483 481
484 return textures; 482 return textures;
@@ -490,31 +488,28 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
490 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index]; 488 auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
491 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0); 489 ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
492 490
493 GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle); 491 const GPUVAddr tex_info_address =
492 tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
494 493
495 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); 494 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
496 495
497 std::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); 496 const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
498 Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)}; 497 ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
498
499 const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
499 500
500 Texture::FullTextureInfo tex_info{}; 501 Texture::FullTextureInfo tex_info{};
501 tex_info.index = static_cast<u32>(offset); 502 tex_info.index = static_cast<u32>(offset);
502 503
503 // Load the TIC data. 504 // Load the TIC data.
504 if (tex_handle.tic_id != 0) { 505 auto tic_entry = GetTICEntry(tex_handle.tic_id);
505 tex_info.enabled = true; 506 // TODO(Subv): Workaround for BitField's move constructor being deleted.
506 507 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
507 auto tic_entry = GetTICEntry(tex_handle.tic_id);
508 // TODO(Subv): Workaround for BitField's move constructor being deleted.
509 std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
510 }
511 508
512 // Load the TSC data 509 // Load the TSC data
513 if (tex_handle.tsc_id != 0) { 510 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
514 auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); 511 // TODO(Subv): Workaround for BitField's move constructor being deleted.
515 // TODO(Subv): Workaround for BitField's move constructor being deleted. 512 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
516 std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
517 }
518 513
519 return tex_info; 514 return tex_info;
520} 515}
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 06462f570..d6c41a5ae 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -39,8 +39,10 @@ void MaxwellDMA::HandleCopy() {
39 const GPUVAddr source = regs.src_address.Address(); 39 const GPUVAddr source = regs.src_address.Address();
40 const GPUVAddr dest = regs.dst_address.Address(); 40 const GPUVAddr dest = regs.dst_address.Address();
41 41
42 const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source); 42 const auto source_cpu = memory_manager.GpuToCpuAddress(source);
43 const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest); 43 const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
44 ASSERT_MSG(source_cpu, "Invalid source GPU address");
45 ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
44 46
45 // TODO(Subv): Perform more research and implement all features of this engine. 47 // TODO(Subv): Perform more research and implement all features of this engine.
46 ASSERT(regs.exec.enable_swizzle == 0); 48 ASSERT(regs.exec.enable_swizzle == 0);
@@ -64,7 +66,7 @@ void MaxwellDMA::HandleCopy() {
64 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 66 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
65 // y_count). 67 // y_count).
66 if (!regs.exec.enable_2d) { 68 if (!regs.exec.enable_2d) {
67 Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count); 69 Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count);
68 return; 70 return;
69 } 71 }
70 72
@@ -73,8 +75,8 @@ void MaxwellDMA::HandleCopy() {
73 // rectangle. There is no need to manually flush/invalidate the regions because 75 // rectangle. There is no need to manually flush/invalidate the regions because
74 // CopyBlock does that for us. 76 // CopyBlock does that for us.
75 for (u32 line = 0; line < regs.y_count; ++line) { 77 for (u32 line = 0; line < regs.y_count; ++line) {
76 const VAddr source_line = source_cpu + line * regs.src_pitch; 78 const VAddr source_line = *source_cpu + line * regs.src_pitch;
77 const VAddr dest_line = dest_cpu + line * regs.dst_pitch; 79 const VAddr dest_line = *dest_cpu + line * regs.dst_pitch;
78 Memory::CopyBlock(dest_line, source_line, regs.x_count); 80 Memory::CopyBlock(dest_line, source_line, regs.x_count);
79 } 81 }
80 return; 82 return;
@@ -87,12 +89,12 @@ void MaxwellDMA::HandleCopy() {
87 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 89 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
88 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 90 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
89 // copying. 91 // copying.
90 rasterizer.FlushRegion(source_cpu, src_size); 92 rasterizer.FlushRegion(*source_cpu, src_size);
91 93
92 // We have to invalidate the destination region to evict any outdated surfaces from the 94 // We have to invalidate the destination region to evict any outdated surfaces from the
93 // cache. We do this before actually writing the new data because the destination address 95 // cache. We do this before actually writing the new data because the destination address
94 // might contain a dirty surface that will have to be written back to memory. 96 // might contain a dirty surface that will have to be written back to memory.
95 rasterizer.InvalidateRegion(dest_cpu, dst_size); 97 rasterizer.InvalidateRegion(*dest_cpu, dst_size);
96 }; 98 };
97 99
98 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 100 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -105,8 +107,8 @@ void MaxwellDMA::HandleCopy() {
105 copy_size * src_bytes_per_pixel); 107 copy_size * src_bytes_per_pixel);
106 108
107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 109 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
108 regs.src_params.size_x, src_bytes_per_pixel, source_cpu, dest_cpu, 110 regs.src_params.size_x, src_bytes_per_pixel, *source_cpu,
109 regs.src_params.BlockHeight(), regs.src_params.pos_x, 111 *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x,
110 regs.src_params.pos_y); 112 regs.src_params.pos_y);
111 } else { 113 } else {
112 ASSERT(regs.dst_params.size_z == 1); 114 ASSERT(regs.dst_params.size_z == 1);
@@ -119,7 +121,7 @@ void MaxwellDMA::HandleCopy() {
119 121
120 // If the input is linear and the output is tiled, swizzle the input and copy it over. 122 // If the input is linear and the output is tiled, swizzle the input and copy it over.
121 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 123 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
122 src_bpp, dest_cpu, source_cpu, regs.dst_params.BlockHeight()); 124 src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight());
123 } 125 }
124} 126}
125 127
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 713b01c9f..269df9437 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -217,9 +217,9 @@ enum class StoreType : u64 {
217 Signed8 = 1, 217 Signed8 = 1,
218 Unsigned16 = 2, 218 Unsigned16 = 2,
219 Signed16 = 3, 219 Signed16 = 3,
220 Bytes32 = 4, 220 Bits32 = 4,
221 Bytes64 = 5, 221 Bits64 = 5,
222 Bytes128 = 6, 222 Bits128 = 6,
223}; 223};
224 224
225enum class IMinMaxExchange : u64 { 225enum class IMinMaxExchange : u64 {
@@ -981,6 +981,10 @@ union Instruction {
981 } 981 }
982 return false; 982 return false;
983 } 983 }
984
985 bool IsComponentEnabled(std::size_t component) const {
986 return ((1ULL << component) & component_mask) != 0;
987 }
984 } txq; 988 } txq;
985 989
986 union { 990 union {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 47247f097..54abe5298 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -154,7 +154,8 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
154 const VAddr base_addr{PageSlot(gpu_addr)}; 154 const VAddr base_addr{PageSlot(gpu_addr)};
155 155
156 if (base_addr == static_cast<u64>(PageStatus::Allocated) || 156 if (base_addr == static_cast<u64>(PageStatus::Allocated) ||
157 base_addr == static_cast<u64>(PageStatus::Unmapped)) { 157 base_addr == static_cast<u64>(PageStatus::Unmapped) ||
158 base_addr == static_cast<u64>(PageStatus::Reserved)) {
158 return {}; 159 return {};
159 } 160 }
160 161
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 4c08bb148..77da135a0 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <atomic>
7#include <functional> 8#include <functional>
8#include "common/common_types.h" 9#include "common/common_types.h"
9#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
@@ -61,5 +62,9 @@ public:
61 62
62 /// Increase/decrease the number of object in pages touching the specified region 63 /// Increase/decrease the number of object in pages touching the specified region
63 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} 64 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {}
65
66 /// Initialize disk cached resources for the game being emulated
67 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
68 const DiskResourceLoadCallback& callback = {}) {}
64}; 69};
65} // namespace VideoCore 70} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index bd2b30e77..b3062e5ba 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -19,7 +19,8 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
19GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, 19GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
20 std::size_t alignment, bool cache) { 20 std::size_t alignment, bool cache) {
21 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 21 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
22 const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 22 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
23 ASSERT_MSG(cpu_addr, "Invalid GPU address");
23 24
24 // Cache management is a big overhead, so only cache entries with a given size. 25 // Cache management is a big overhead, so only cache entries with a given size.
25 // TODO: Figure out which size is the best for given games. 26 // TODO: Figure out which size is the best for given games.
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index d9ed08437..77d5cedd2 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,7 +46,9 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); 46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
47 47
48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
49 const std::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 49 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
50 ASSERT_MSG(cpu_addr, "Invalid GPU address");
51
50 const u8* source{Memory::GetPointer(*cpu_addr)}; 52 const u8* source{Memory::GetPointer(*cpu_addr)};
51 53
52 for (u32 primitive = 0; primitive < count / 4; ++primitive) { 54 for (u32 primitive = 0; primitive < count / 4; ++primitive) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9f7c837d6..974ca6a20 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -22,6 +22,7 @@
22#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/maxwell_3d.h" 23#include "video_core/engines/maxwell_3d.h"
24#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 26#include "video_core/renderer_opengl/gl_shader_gen.h"
26#include "video_core/renderer_opengl/maxwell_to_gl.h" 27#include "video_core/renderer_opengl/maxwell_to_gl.h"
27#include "video_core/renderer_opengl/renderer_opengl.h" 28#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -99,8 +100,9 @@ struct FramebufferCacheKey {
99 } 100 }
100}; 101};
101 102
102RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) 103RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
103 : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info}, 104 ScreenInfo& info)
105 : res_cache{*this}, shader_cache{*this, system}, emu_window{window}, screen_info{info},
104 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} { 106 buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
105 // Create sampler objects 107 // Create sampler objects
106 for (std::size_t i = 0; i < texture_samplers.size(); ++i) { 108 for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
@@ -447,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
447 return boost::make_iterator_range(map.equal_range(interval)); 449 return boost::make_iterator_range(map.equal_range(interval));
448} 450}
449 451
450void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { 452void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
451 const u64 page_start{addr >> Memory::PAGE_BITS}; 453 const u64 page_start{addr >> Memory::PAGE_BITS};
452 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; 454 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
453 455
@@ -477,6 +479,11 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
477 cached_pages.add({pages_interval, delta}); 479 cached_pages.add({pages_interval, delta});
478} 480}
479 481
482void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
483 const VideoCore::DiskResourceLoadCallback& callback) {
484 shader_cache.LoadDiskCache(stop_loading, callback);
485}
486
480std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( 487std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
481 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, 488 OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
482 std::optional<std::size_t> single_color_target) { 489 std::optional<std::size_t> single_color_target) {
@@ -1004,29 +1011,20 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
1004 1011
1005 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 1012 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
1006 const auto& entry = entries[bindpoint]; 1013 const auto& entry = entries[bindpoint];
1014 const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
1007 const u32 current_bindpoint = base_bindings.sampler + bindpoint; 1015 const u32 current_bindpoint = base_bindings.sampler + bindpoint;
1008 auto& unit = state.texture_units[current_bindpoint];
1009
1010 const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
1011 if (!texture.enabled) {
1012 unit.texture = 0;
1013 continue;
1014 }
1015 1016
1016 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); 1017 texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
1017 1018
1018 Surface surface = res_cache.GetTextureSurface(texture, entry); 1019 Surface surface = res_cache.GetTextureSurface(texture, entry);
1019 if (surface != nullptr) { 1020 if (surface != nullptr) {
1020 unit.texture = 1021 state.texture_units[current_bindpoint].texture =
1021 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; 1022 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
1022 unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); 1023 surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
1023 unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); 1024 texture.tic.w_source);
1024 unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source);
1025 unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source);
1026 unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source);
1027 } else { 1025 } else {
1028 // Can occur when texture addr is null or its memory is unmapped/invalid 1026 // Can occur when texture addr is null or its memory is unmapped/invalid
1029 unit.texture = 0; 1027 state.texture_units[current_bindpoint].texture = 0;
1030 } 1028 }
1031 } 1029 }
1032} 1030}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7f2bf0f8b..f3b607f4d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
8#include <cstddef> 9#include <cstddef>
9#include <map> 10#include <map>
10#include <memory> 11#include <memory>
@@ -33,6 +34,10 @@
33#include "video_core/renderer_opengl/gl_state.h" 34#include "video_core/renderer_opengl/gl_state.h"
34#include "video_core/renderer_opengl/gl_stream_buffer.h" 35#include "video_core/renderer_opengl/gl_stream_buffer.h"
35 36
37namespace Core {
38class System;
39}
40
36namespace Core::Frontend { 41namespace Core::Frontend {
37class EmuWindow; 42class EmuWindow;
38} 43}
@@ -45,7 +50,8 @@ struct FramebufferCacheKey;
45 50
46class RasterizerOpenGL : public VideoCore::RasterizerInterface { 51class RasterizerOpenGL : public VideoCore::RasterizerInterface {
47public: 52public:
48 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info); 53 explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system,
54 ScreenInfo& info);
49 ~RasterizerOpenGL() override; 55 ~RasterizerOpenGL() override;
50 56
51 void DrawArrays() override; 57 void DrawArrays() override;
@@ -60,6 +66,8 @@ public:
60 u32 pixel_stride) override; 66 u32 pixel_stride) override;
61 bool AccelerateDrawBatch(bool is_indexed) override; 67 bool AccelerateDrawBatch(bool is_indexed) override;
62 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; 68 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override;
69 void LoadDiskResources(const std::atomic_bool& stop_loading,
70 const VideoCore::DiskResourceLoadCallback& callback) override;
63 71
64 /// Maximum supported size that a constbuffer can have in bytes. 72 /// Maximum supported size that a constbuffer can have in bytes.
65 static constexpr std::size_t MaxConstbufferSize = 0x10000; 73 static constexpr std::size_t MaxConstbufferSize = 0x10000;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 50286432d..a79eee03e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -18,7 +18,6 @@
18#include "video_core/morton.h" 18#include "video_core/morton.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 19#include "video_core/renderer_opengl/gl_rasterizer.h"
20#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 20#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
21#include "video_core/renderer_opengl/gl_state.h"
22#include "video_core/renderer_opengl/utils.h" 21#include "video_core/renderer_opengl/utils.h"
23#include "video_core/surface.h" 22#include "video_core/surface.h"
24#include "video_core/textures/astc.h" 23#include "video_core/textures/astc.h"
@@ -44,14 +43,14 @@ struct FormatTuple {
44 bool compressed; 43 bool compressed;
45}; 44};
46 45
47static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) { 46static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
48 glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 47 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
49 glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 48 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
50 glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 49 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
51 glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); 50 glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
52 glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); 51 glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
53 if (max_mip_level == 1) { 52 if (max_mip_level == 1) {
54 glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0); 53 glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0);
55 } 54 }
56} 55}
57 56
@@ -529,55 +528,41 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
529CachedSurface::CachedSurface(const SurfaceParams& params) 528CachedSurface::CachedSurface(const SurfaceParams& params)
530 : params(params), gl_target(SurfaceTargetToGL(params.target)), 529 : params(params), gl_target(SurfaceTargetToGL(params.target)),
531 cached_size_in_bytes(params.size_in_bytes) { 530 cached_size_in_bytes(params.size_in_bytes) {
532 texture.Create(); 531 texture.Create(gl_target);
533 const auto& rect{params.GetRect()}; 532
534 533 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
535 // Keep track of previous texture bindings 534 // alternatives. This signals a bug on those functions.
536 OpenGLState cur_state = OpenGLState::GetCurState(); 535 const auto width = static_cast<GLsizei>(params.MipWidth(0));
537 const auto& old_tex = cur_state.texture_units[0]; 536 const auto height = static_cast<GLsizei>(params.MipHeight(0));
538 SCOPE_EXIT({
539 cur_state.texture_units[0] = old_tex;
540 cur_state.Apply();
541 });
542
543 cur_state.texture_units[0].texture = texture.handle;
544 cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
545 cur_state.Apply();
546 glActiveTexture(GL_TEXTURE0);
547 537
548 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); 538 const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
549 gl_internal_format = format_tuple.internal_format; 539 gl_internal_format = format_tuple.internal_format;
550 gl_is_compressed = format_tuple.compressed;
551 540
552 if (!format_tuple.compressed) { 541 switch (params.target) {
553 // Only pre-create the texture for non-compressed textures. 542 case SurfaceTarget::Texture1D:
554 switch (params.target) { 543 glTextureStorage1D(texture.handle, params.max_mip_level, format_tuple.internal_format,
555 case SurfaceTarget::Texture1D: 544 width);
556 glTexStorage1D(SurfaceTargetToGL(params.target), params.max_mip_level, 545 break;
557 format_tuple.internal_format, rect.GetWidth()); 546 case SurfaceTarget::Texture2D:
558 break; 547 case SurfaceTarget::TextureCubemap:
559 case SurfaceTarget::Texture2D: 548 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
560 case SurfaceTarget::TextureCubemap: 549 width, height);
561 glTexStorage2D(SurfaceTargetToGL(params.target), params.max_mip_level, 550 break;
562 format_tuple.internal_format, rect.GetWidth(), rect.GetHeight()); 551 case SurfaceTarget::Texture3D:
563 break; 552 case SurfaceTarget::Texture2DArray:
564 case SurfaceTarget::Texture3D: 553 case SurfaceTarget::TextureCubeArray:
565 case SurfaceTarget::Texture2DArray: 554 glTextureStorage3D(texture.handle, params.max_mip_level, format_tuple.internal_format,
566 case SurfaceTarget::TextureCubeArray: 555 width, height, params.depth);
567 glTexStorage3D(SurfaceTargetToGL(params.target), params.max_mip_level, 556 break;
568 format_tuple.internal_format, rect.GetWidth(), rect.GetHeight(), 557 default:
569 params.depth); 558 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
570 break; 559 static_cast<u32>(params.target));
571 default: 560 UNREACHABLE();
572 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 561 glTextureStorage2D(texture.handle, params.max_mip_level, format_tuple.internal_format,
573 static_cast<u32>(params.target)); 562 width, height);
574 UNREACHABLE();
575 glTexStorage2D(GL_TEXTURE_2D, params.max_mip_level, format_tuple.internal_format,
576 rect.GetWidth(), rect.GetHeight());
577 }
578 } 563 }
579 564
580 ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level); 565 ApplyTextureDefaults(texture.handle, params.max_mip_level);
581 566
582 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); 567 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString());
583 568
@@ -751,63 +736,50 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
751 const auto& rect{params.GetRect(mip_map)}; 736 const auto& rect{params.GetRect(mip_map)};
752 737
753 // Load data from memory to the surface 738 // Load data from memory to the surface
754 const GLint x0 = static_cast<GLint>(rect.left); 739 const auto x0 = static_cast<GLint>(rect.left);
755 const GLint y0 = static_cast<GLint>(rect.bottom); 740 const auto y0 = static_cast<GLint>(rect.bottom);
756 std::size_t buffer_offset = 741 auto buffer_offset =
757 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + 742 static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) +
758 static_cast<std::size_t>(x0)) * 743 static_cast<std::size_t>(x0)) *
759 GetBytesPerPixel(params.pixel_format); 744 GetBytesPerPixel(params.pixel_format);
760 745
761 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); 746 const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
762 const GLuint target_tex = texture.handle;
763 OpenGLState cur_state = OpenGLState::GetCurState();
764
765 const auto& old_tex = cur_state.texture_units[0];
766 SCOPE_EXIT({
767 cur_state.texture_units[0] = old_tex;
768 cur_state.Apply();
769 });
770 cur_state.texture_units[0].texture = target_tex;
771 cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
772 cur_state.Apply();
773 747
774 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT 748 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
775 ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0); 749 ASSERT(params.MipWidth(mip_map) * GetBytesPerPixel(params.pixel_format) % 4 == 0);
776 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); 750 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map)));
777 751
778 GLsizei image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); 752 const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false));
779 glActiveTexture(GL_TEXTURE0);
780 if (tuple.compressed) { 753 if (tuple.compressed) {
781 switch (params.target) { 754 switch (params.target) {
782 case SurfaceTarget::Texture2D: 755 case SurfaceTarget::Texture2D:
783 glCompressedTexImage2D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 756 glCompressedTextureSubImage2D(
784 static_cast<GLsizei>(params.MipWidth(mip_map)), 757 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
785 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, image_size, 758 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size,
786 &gl_buffer[mip_map][buffer_offset]); 759 &gl_buffer[mip_map][buffer_offset]);
787 break; 760 break;
788 case SurfaceTarget::Texture3D: 761 case SurfaceTarget::Texture3D:
789 glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 762 glCompressedTextureSubImage3D(
790 static_cast<GLsizei>(params.MipWidth(mip_map)), 763 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
791 static_cast<GLsizei>(params.MipHeight(mip_map)), 764 static_cast<GLsizei>(params.MipHeight(mip_map)),
792 static_cast<GLsizei>(params.MipDepth(mip_map)), 0, image_size, 765 static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size,
793 &gl_buffer[mip_map][buffer_offset]); 766 &gl_buffer[mip_map][buffer_offset]);
794 break; 767 break;
795 case SurfaceTarget::Texture2DArray: 768 case SurfaceTarget::Texture2DArray:
796 case SurfaceTarget::TextureCubeArray: 769 case SurfaceTarget::TextureCubeArray:
797 glCompressedTexImage3D(SurfaceTargetToGL(params.target), mip_map, tuple.internal_format, 770 glCompressedTextureSubImage3D(
798 static_cast<GLsizei>(params.MipWidth(mip_map)), 771 texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
799 static_cast<GLsizei>(params.MipHeight(mip_map)), 772 static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth),
800 static_cast<GLsizei>(params.depth), 0, image_size, 773 tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]);
801 &gl_buffer[mip_map][buffer_offset]);
802 break; 774 break;
803 case SurfaceTarget::TextureCubemap: { 775 case SurfaceTarget::TextureCubemap: {
804 GLsizei layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); 776 const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map));
805 for (std::size_t face = 0; face < params.depth; ++face) { 777 for (std::size_t face = 0; face < params.depth; ++face) {
806 glCompressedTexImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), 778 glCompressedTextureSubImage3D(
807 mip_map, tuple.internal_format, 779 texture.handle, mip_map, 0, 0, static_cast<GLint>(face),
808 static_cast<GLsizei>(params.MipWidth(mip_map)), 780 static_cast<GLsizei>(params.MipWidth(mip_map)),
809 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, 781 static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format,
810 layer_size, &gl_buffer[mip_map][buffer_offset]); 782 layer_size, &gl_buffer[mip_map][buffer_offset]);
811 buffer_offset += layer_size; 783 buffer_offset += layer_size;
812 } 784 }
813 break; 785 break;
@@ -816,46 +788,43 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
816 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 788 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
817 static_cast<u32>(params.target)); 789 static_cast<u32>(params.target));
818 UNREACHABLE(); 790 UNREACHABLE();
819 glCompressedTexImage2D(GL_TEXTURE_2D, mip_map, tuple.internal_format, 791 glCompressedTextureSubImage2D(
820 static_cast<GLsizei>(params.MipWidth(mip_map)), 792 texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)),
821 static_cast<GLsizei>(params.MipHeight(mip_map)), 0, 793 static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format,
822 static_cast<GLsizei>(params.size_in_bytes_gl), 794 static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]);
823 &gl_buffer[mip_map][buffer_offset]);
824 } 795 }
825 } else { 796 } else {
826
827 switch (params.target) { 797 switch (params.target) {
828 case SurfaceTarget::Texture1D: 798 case SurfaceTarget::Texture1D:
829 glTexSubImage1D(SurfaceTargetToGL(params.target), mip_map, x0, 799 glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()),
830 static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type, 800 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
831 &gl_buffer[mip_map][buffer_offset]);
832 break; 801 break;
833 case SurfaceTarget::Texture2D: 802 case SurfaceTarget::Texture2D:
834 glTexSubImage2D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 803 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
835 static_cast<GLsizei>(rect.GetWidth()), 804 static_cast<GLsizei>(rect.GetWidth()),
836 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 805 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
837 &gl_buffer[mip_map][buffer_offset]); 806 &gl_buffer[mip_map][buffer_offset]);
838 break; 807 break;
839 case SurfaceTarget::Texture3D: 808 case SurfaceTarget::Texture3D:
840 glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, 809 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
841 static_cast<GLsizei>(rect.GetWidth()), 810 static_cast<GLsizei>(rect.GetWidth()),
842 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), 811 static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map),
843 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); 812 tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]);
844 break; 813 break;
845 case SurfaceTarget::Texture2DArray: 814 case SurfaceTarget::Texture2DArray:
846 case SurfaceTarget::TextureCubeArray: 815 case SurfaceTarget::TextureCubeArray:
847 glTexSubImage3D(SurfaceTargetToGL(params.target), mip_map, x0, y0, 0, 816 glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0,
848 static_cast<GLsizei>(rect.GetWidth()), 817 static_cast<GLsizei>(rect.GetWidth()),
849 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, 818 static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
850 tuple.type, &gl_buffer[mip_map][buffer_offset]); 819 tuple.type, &gl_buffer[mip_map][buffer_offset]);
851 break; 820 break;
852 case SurfaceTarget::TextureCubemap: { 821 case SurfaceTarget::TextureCubemap: {
853 std::size_t start = buffer_offset; 822 std::size_t start = buffer_offset;
854 for (std::size_t face = 0; face < params.depth; ++face) { 823 for (std::size_t face = 0; face < params.depth; ++face) {
855 glTexSubImage2D(static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face), mip_map, 824 glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face),
856 x0, y0, static_cast<GLsizei>(rect.GetWidth()), 825 static_cast<GLsizei>(rect.GetWidth()),
857 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 826 static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format,
858 &gl_buffer[mip_map][buffer_offset]); 827 tuple.type, &gl_buffer[mip_map][buffer_offset]);
859 buffer_offset += params.LayerSizeGL(mip_map); 828 buffer_offset += params.LayerSizeGL(mip_map);
860 } 829 }
861 break; 830 break;
@@ -864,9 +833,10 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
864 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", 833 LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
865 static_cast<u32>(params.target)); 834 static_cast<u32>(params.target));
866 UNREACHABLE(); 835 UNREACHABLE();
867 glTexSubImage2D(GL_TEXTURE_2D, mip_map, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 836 glTextureSubImage2D(texture.handle, mip_map, x0, y0,
868 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 837 static_cast<GLsizei>(rect.GetWidth()),
869 &gl_buffer[mip_map][buffer_offset]); 838 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
839 &gl_buffer[mip_map][buffer_offset]);
870 } 840 }
871 } 841 }
872 842
@@ -876,29 +846,18 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
876void CachedSurface::EnsureTextureView() { 846void CachedSurface::EnsureTextureView() {
877 if (texture_view.handle != 0) 847 if (texture_view.handle != 0)
878 return; 848 return;
879 // Compressed texture are not being created with immutable storage
880 UNIMPLEMENTED_IF(gl_is_compressed);
881 849
882 const GLenum target{TargetLayer()}; 850 const GLenum target{TargetLayer()};
883 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; 851 const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u};
884 constexpr GLuint min_layer = 0; 852 constexpr GLuint min_layer = 0;
885 constexpr GLuint min_level = 0; 853 constexpr GLuint min_level = 0;
886 854
887 texture_view.Create(); 855 glGenTextures(1, &texture_view.handle);
888 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, 856 glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
889 params.max_mip_level, min_layer, num_layers); 857 params.max_mip_level, 0, 1);
890 858 ApplyTextureDefaults(texture_view.handle, params.max_mip_level);
891 OpenGLState cur_state = OpenGLState::GetCurState(); 859 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA,
892 const auto& old_tex = cur_state.texture_units[0]; 860 reinterpret_cast<const GLint*>(swizzle.data()));
893 SCOPE_EXIT({
894 cur_state.texture_units[0] = old_tex;
895 cur_state.Apply();
896 });
897 cur_state.texture_units[0].texture = texture_view.handle;
898 cur_state.texture_units[0].target = target;
899 cur_state.Apply();
900
901 ApplyTextureDefaults(target, params.max_mip_level);
902} 861}
903 862
904MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); 863MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
@@ -909,6 +868,25 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
909 UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle); 868 UploadGLMipmapTexture(i, read_fb_handle, draw_fb_handle);
910} 869}
911 870
871void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
872 Tegra::Texture::SwizzleSource swizzle_y,
873 Tegra::Texture::SwizzleSource swizzle_z,
874 Tegra::Texture::SwizzleSource swizzle_w) {
875 const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x);
876 const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y);
877 const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z);
878 const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w);
879 if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) {
880 return;
881 }
882 swizzle = {new_x, new_y, new_z, new_w};
883 const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data());
884 glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
885 if (texture_view.handle != 0) {
886 glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data);
887 }
888}
889
912RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) 890RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer)
913 : RasterizerCache{rasterizer} { 891 : RasterizerCache{rasterizer} {
914 read_framebuffer.Create(); 892 read_framebuffer.Create();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8d7d6722c..490b8252e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -382,6 +382,11 @@ public:
382 // Upload data in gl_buffer to this surface's texture 382 // Upload data in gl_buffer to this surface's texture
383 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); 383 void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
384 384
385 void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x,
386 Tegra::Texture::SwizzleSource swizzle_y,
387 Tegra::Texture::SwizzleSource swizzle_z,
388 Tegra::Texture::SwizzleSource swizzle_w);
389
385private: 390private:
386 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); 391 void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
387 392
@@ -393,8 +398,8 @@ private:
393 SurfaceParams params{}; 398 SurfaceParams params{};
394 GLenum gl_target{}; 399 GLenum gl_target{};
395 GLenum gl_internal_format{}; 400 GLenum gl_internal_format{};
396 bool gl_is_compressed{};
397 std::size_t cached_size_in_bytes{}; 401 std::size_t cached_size_in_bytes{};
402 std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
398}; 403};
399 404
400class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 405class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 1da744158..bfe666a73 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -15,12 +15,12 @@ MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_R
15 15
16namespace OpenGL { 16namespace OpenGL {
17 17
18void OGLTexture::Create() { 18void OGLTexture::Create(GLenum target) {
19 if (handle != 0) 19 if (handle != 0)
20 return; 20 return;
21 21
22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 22 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
23 glGenTextures(1, &handle); 23 glCreateTextures(target, 1, &handle);
24} 24}
25 25
26void OGLTexture::Release() { 26void OGLTexture::Release() {
@@ -71,7 +71,8 @@ void OGLShader::Release() {
71} 71}
72 72
73void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader, 73void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shader,
74 const char* frag_shader, bool separable_program) { 74 const char* frag_shader, bool separable_program,
75 bool hint_retrievable) {
75 OGLShader vert, geo, frag; 76 OGLShader vert, geo, frag;
76 if (vert_shader) 77 if (vert_shader)
77 vert.Create(vert_shader, GL_VERTEX_SHADER); 78 vert.Create(vert_shader, GL_VERTEX_SHADER);
@@ -81,7 +82,7 @@ void OGLProgram::CreateFromSource(const char* vert_shader, const char* geo_shade
81 frag.Create(frag_shader, GL_FRAGMENT_SHADER); 82 frag.Create(frag_shader, GL_FRAGMENT_SHADER);
82 83
83 MICROPROFILE_SCOPE(OpenGL_ResourceCreation); 84 MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
84 Create(separable_program, vert.handle, geo.handle, frag.handle); 85 Create(separable_program, hint_retrievable, vert.handle, geo.handle, frag.handle);
85} 86}
86 87
87void OGLProgram::Release() { 88void OGLProgram::Release() {
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index e33f1e973..fbb93ee49 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -28,7 +28,7 @@ public:
28 } 28 }
29 29
30 /// Creates a new internal OpenGL resource and stores the handle 30 /// Creates a new internal OpenGL resource and stores the handle
31 void Create(); 31 void Create(GLenum target);
32 32
33 /// Deletes the internal OpenGL resource 33 /// Deletes the internal OpenGL resource
34 void Release(); 34 void Release();
@@ -101,15 +101,15 @@ public:
101 } 101 }
102 102
103 template <typename... T> 103 template <typename... T>
104 void Create(bool separable_program, T... shaders) { 104 void Create(bool separable_program, bool hint_retrievable, T... shaders) {
105 if (handle != 0) 105 if (handle != 0)
106 return; 106 return;
107 handle = GLShader::LoadProgram(separable_program, shaders...); 107 handle = GLShader::LoadProgram(separable_program, hint_retrievable, shaders...);
108 } 108 }
109 109
110 /// Creates a new internal OpenGL resource and stores the handle 110 /// Creates a new internal OpenGL resource and stores the handle
111 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader, 111 void CreateFromSource(const char* vert_shader, const char* geo_shader, const char* frag_shader,
112 bool separable_program = false); 112 bool separable_program = false, bool hint_retrievable = false);
113 113
114 /// Deletes the internal OpenGL resource 114 /// Deletes the internal OpenGL resource
115 void Release(); 115 void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 90eda7814..4883e4f62 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -11,6 +11,7 @@
11#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_cache.h" 12#include "video_core/renderer_opengl/gl_shader_cache.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h" 13#include "video_core/renderer_opengl/gl_shader_decompiler.h"
14#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
14#include "video_core/renderer_opengl/gl_shader_manager.h" 15#include "video_core/renderer_opengl/gl_shader_manager.h"
15#include "video_core/renderer_opengl/utils.h" 16#include "video_core/renderer_opengl/utils.h"
16#include "video_core/shader/shader_ir.h" 17#include "video_core/shader/shader_ir.h"
@@ -19,16 +20,29 @@ namespace OpenGL {
19 20
20using VideoCommon::Shader::ProgramCode; 21using VideoCommon::Shader::ProgramCode;
21 22
23// One UBO is always reserved for emulation values
24constexpr u32 RESERVED_UBOS = 1;
25
26struct UnspecializedShader {
27 std::string code;
28 GLShader::ShaderEntries entries;
29 Maxwell::ShaderProgram program_type;
30};
31
32namespace {
33
22/// Gets the address for the specified shader stage program 34/// Gets the address for the specified shader stage program
23static VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 35VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
24 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 36 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
25 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; 37 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)];
26 return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + 38 const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
27 shader_config.offset); 39 shader_config.offset);
40 ASSERT_MSG(address, "Invalid GPU address");
41 return *address;
28} 42}
29 43
30/// Gets the shader program code from memory for the specified address 44/// Gets the shader program code from memory for the specified address
31static ProgramCode GetShaderCode(VAddr addr) { 45ProgramCode GetShaderCode(VAddr addr) {
32 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); 46 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
33 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 47 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64));
34 return program_code; 48 return program_code;
@@ -49,38 +63,196 @@ constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) {
49 } 63 }
50} 64}
51 65
52CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) 66/// Gets if the current instruction offset is a scheduler instruction
53 : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { 67constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
68 // Sched instructions appear once every 4 instructions.
69 constexpr std::size_t SchedPeriod = 4;
70 const std::size_t absolute_offset = offset - main_offset;
71 return (absolute_offset % SchedPeriod) == 0;
72}
54 73
55 GLShader::ProgramResult program_result; 74/// Describes primitive behavior on geometry shaders
75constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
76 switch (primitive_mode) {
77 case GL_POINTS:
78 return {"points", "Points", 1};
79 case GL_LINES:
80 case GL_LINE_STRIP:
81 return {"lines", "Lines", 2};
82 case GL_LINES_ADJACENCY:
83 case GL_LINE_STRIP_ADJACENCY:
84 return {"lines_adjacency", "LinesAdj", 4};
85 case GL_TRIANGLES:
86 case GL_TRIANGLE_STRIP:
87 case GL_TRIANGLE_FAN:
88 return {"triangles", "Triangles", 3};
89 case GL_TRIANGLES_ADJACENCY:
90 case GL_TRIANGLE_STRIP_ADJACENCY:
91 return {"triangles_adjacency", "TrianglesAdj", 6};
92 default:
93 return {"points", "Invalid", 1};
94 }
95}
56 96
57 switch (program_type) { 97/// Calculates the size of a program stream
58 case Maxwell::ShaderProgram::VertexA: 98std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
99 constexpr std::size_t start_offset = 10;
100 std::size_t offset = start_offset;
101 std::size_t size = start_offset * sizeof(u64);
102 while (offset < program.size()) {
103 const u64 instruction = program[offset];
104 if (!IsSchedInstruction(offset, start_offset)) {
105 if (instruction == 0 || (instruction >> 52) == 0x50b) {
106 // End on Maxwell's "nop" instruction
107 break;
108 }
109 }
110 size += sizeof(u64);
111 offset++;
112 }
113 // The last instruction is included in the program size
114 return std::min(size + sizeof(u64), program.size() * sizeof(u64));
115}
116
117/// Hashes one (or two) program streams
118u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
119 const ProgramCode& code_b) {
120 u64 unique_identifier =
121 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
122 if (program_type != Maxwell::ShaderProgram::VertexA) {
123 return unique_identifier;
124 }
125 // VertexA programs include two programs
126
127 std::size_t seed = 0;
128 boost::hash_combine(seed, unique_identifier);
129
130 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
131 CalculateProgramSize(code_b));
132 boost::hash_combine(seed, identifier_b);
133 return static_cast<u64>(seed);
134}
135
136/// Creates an unspecialized program from code streams
137GLShader::ProgramResult CreateProgram(Maxwell::ShaderProgram program_type, ProgramCode program_code,
138 ProgramCode program_code_b) {
139 GLShader::ShaderSetup setup(program_code);
140 if (program_type == Maxwell::ShaderProgram::VertexA) {
59 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 141 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
60 // Conventional HW does not support this, so we combine VertexA and VertexB into one 142 // Conventional HW does not support this, so we combine VertexA and VertexB into one
61 // stage here. 143 // stage here.
62 setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); 144 setup.SetProgramB(program_code_b);
145 }
146 setup.program.unique_identifier =
147 GetUniqueIdentifier(program_type, program_code, program_code_b);
148
149 switch (program_type) {
150 case Maxwell::ShaderProgram::VertexA:
63 case Maxwell::ShaderProgram::VertexB: 151 case Maxwell::ShaderProgram::VertexB:
64 CalculateProperties(); 152 return GLShader::GenerateVertexShader(setup);
65 program_result = GLShader::GenerateVertexShader(setup);
66 break;
67 case Maxwell::ShaderProgram::Geometry: 153 case Maxwell::ShaderProgram::Geometry:
68 CalculateProperties(); 154 return GLShader::GenerateGeometryShader(setup);
69 program_result = GLShader::GenerateGeometryShader(setup);
70 break;
71 case Maxwell::ShaderProgram::Fragment: 155 case Maxwell::ShaderProgram::Fragment:
72 CalculateProperties(); 156 return GLShader::GenerateFragmentShader(setup);
73 program_result = GLShader::GenerateFragmentShader(setup);
74 break;
75 default: 157 default:
76 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 158 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type));
77 UNREACHABLE(); 159 UNREACHABLE();
160 return {};
161 }
162}
163
164CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
165 Maxwell::ShaderProgram program_type, BaseBindings base_bindings,
166 GLenum primitive_mode, bool hint_retrievable = false) {
167 std::string source = "#version 430 core\n";
168 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
169
170 for (const auto& cbuf : entries.const_buffers) {
171 source +=
172 fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
173 }
174 for (const auto& gmem : entries.global_memory_entries) {
175 source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
176 gmem.GetCbufOffset(), base_bindings.gmem++);
177 }
178 for (const auto& sampler : entries.samplers) {
179 source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
180 base_bindings.sampler++);
181 }
182
183 if (program_type == Maxwell::ShaderProgram::Geometry) {
184 const auto [glsl_topology, debug_name, max_vertices] =
185 GetPrimitiveDescription(primitive_mode);
186
187 source += "layout (" + std::string(glsl_topology) + ") in;\n";
188 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
189 }
190
191 source += code;
192
193 OGLShader shader;
194 shader.Create(source.c_str(), GetShaderType(program_type));
195
196 auto program = std::make_shared<OGLProgram>();
197 program->Create(true, hint_retrievable, shader.handle);
198 return program;
199}
200
201std::set<GLenum> GetSupportedFormats() {
202 std::set<GLenum> supported_formats;
203
204 GLint num_formats{};
205 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
206
207 std::vector<GLint> formats(num_formats);
208 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
209
210 for (const GLint format : formats)
211 supported_formats.insert(static_cast<GLenum>(format));
212 return supported_formats;
213}
214
215} // namespace
216
217CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
218 ShaderDiskCacheOpenGL& disk_cache,
219 const PrecompiledPrograms& precompiled_programs,
220 ProgramCode&& program_code, ProgramCode&& program_code_b)
221 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
222 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
223
224 const std::size_t code_size = CalculateProgramSize(program_code);
225 const std::size_t code_size_b =
226 program_code_b.empty() ? 0 : CalculateProgramSize(program_code_b);
227
228 GLShader::ProgramResult program_result =
229 CreateProgram(program_type, program_code, program_code_b);
230 if (program_result.first.empty()) {
231 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
78 return; 232 return;
79 } 233 }
80 234
81 code = program_result.first; 235 code = program_result.first;
82 entries = program_result.second; 236 entries = program_result.second;
83 shader_length = entries.shader_length; 237 shader_length = entries.shader_length;
238
239 const ShaderDiskCacheRaw raw(unique_identifier, program_type,
240 static_cast<u32>(code_size / sizeof(u64)),
241 static_cast<u32>(code_size_b / sizeof(u64)),
242 std::move(program_code), std::move(program_code_b));
243 disk_cache.SaveRaw(raw);
244}
245
246CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
247 ShaderDiskCacheOpenGL& disk_cache,
248 const PrecompiledPrograms& precompiled_programs,
249 GLShader::ProgramResult result)
250 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type},
251 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} {
252
253 code = std::move(result.first);
254 entries = result.second;
255 shader_length = entries.shader_length;
84} 256}
85 257
86std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, 258std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode,
@@ -92,136 +264,222 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
92 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); 264 const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings);
93 auto& program = entry->second; 265 auto& program = entry->second;
94 if (is_cache_miss) { 266 if (is_cache_miss) {
95 std::string source = AllocateBindings(base_bindings); 267 program = TryLoadProgram(primitive_mode, base_bindings);
96 source += code; 268 if (!program) {
269 program =
270 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
271 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
272 }
97 273
98 OGLShader shader; 274 LabelGLObject(GL_PROGRAM, program->handle, addr);
99 shader.Create(source.c_str(), GetShaderType(program_type));
100 program.Create(true, shader.handle);
101 LabelGLObject(GL_PROGRAM, program.handle, addr);
102 } 275 }
103 276
104 handle = program.handle; 277 handle = program->handle;
105 } 278 }
106 279
107 // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for 280 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS;
108 // emulation values
109 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1;
110 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 281 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
111 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 282 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
112 283
113 return {handle, base_bindings}; 284 return {handle, base_bindings};
114} 285}
115 286
116std::string CachedShader::AllocateBindings(BaseBindings base_bindings) {
117 std::string code = "#version 430 core\n";
118 code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
119
120 for (const auto& cbuf : entries.const_buffers) {
121 code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
122 }
123
124 for (const auto& gmem : entries.global_memory_entries) {
125 code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
126 gmem.GetCbufOffset(), base_bindings.gmem++);
127 }
128
129 for (const auto& sampler : entries.samplers) {
130 code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
131 base_bindings.sampler++);
132 }
133
134 return code;
135}
136
137GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { 287GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) {
138 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); 288 const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings);
139 auto& programs = entry->second; 289 auto& programs = entry->second;
140 290
141 switch (primitive_mode) { 291 switch (primitive_mode) {
142 case GL_POINTS: 292 case GL_POINTS:
143 return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); 293 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
144 case GL_LINES: 294 case GL_LINES:
145 case GL_LINE_STRIP: 295 case GL_LINE_STRIP:
146 return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines"); 296 return LazyGeometryProgram(programs.lines, base_bindings, primitive_mode);
147 case GL_LINES_ADJACENCY: 297 case GL_LINES_ADJACENCY:
148 case GL_LINE_STRIP_ADJACENCY: 298 case GL_LINE_STRIP_ADJACENCY:
149 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4, 299 return LazyGeometryProgram(programs.lines_adjacency, base_bindings, primitive_mode);
150 "ShaderLinesAdjacency");
151 case GL_TRIANGLES: 300 case GL_TRIANGLES:
152 case GL_TRIANGLE_STRIP: 301 case GL_TRIANGLE_STRIP:
153 case GL_TRIANGLE_FAN: 302 case GL_TRIANGLE_FAN:
154 return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3, 303 return LazyGeometryProgram(programs.triangles, base_bindings, primitive_mode);
155 "ShaderTriangles");
156 case GL_TRIANGLES_ADJACENCY: 304 case GL_TRIANGLES_ADJACENCY:
157 case GL_TRIANGLE_STRIP_ADJACENCY: 305 case GL_TRIANGLE_STRIP_ADJACENCY:
158 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, 306 return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, primitive_mode);
159 "triangles_adjacency", 6, "ShaderTrianglesAdjacency");
160 default: 307 default:
161 UNREACHABLE_MSG("Unknown primitive mode."); 308 UNREACHABLE_MSG("Unknown primitive mode.");
162 return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); 309 return LazyGeometryProgram(programs.points, base_bindings, primitive_mode);
163 } 310 }
164} 311}
165 312
166GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, 313GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
167 const std::string& glsl_topology, u32 max_vertices, 314 GLenum primitive_mode) {
168 const std::string& debug_name) { 315 if (target_program) {
169 if (target_program.handle != 0) { 316 return target_program->handle;
170 return target_program.handle; 317 }
318 const auto [glsl_name, debug_name, vertices] = GetPrimitiveDescription(primitive_mode);
319 target_program = TryLoadProgram(primitive_mode, base_bindings);
320 if (!target_program) {
321 target_program =
322 SpecializeShader(code, entries, program_type, base_bindings, primitive_mode);
323 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
171 } 324 }
172 std::string source = AllocateBindings(base_bindings);
173 source += "layout (" + glsl_topology + ") in;\n";
174 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
175 source += code;
176 325
177 OGLShader shader; 326 LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name);
178 shader.Create(source.c_str(), GL_GEOMETRY_SHADER); 327
179 target_program.Create(true, shader.handle); 328 return target_program->handle;
180 LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name);
181 return target_program.handle;
182}; 329};
183 330
184static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { 331CachedProgram CachedShader::TryLoadProgram(GLenum primitive_mode,
185 // sched instructions appear once every 4 instructions. 332 BaseBindings base_bindings) const {
186 static constexpr std::size_t SchedPeriod = 4; 333 const auto found = precompiled_programs.find(GetUsage(primitive_mode, base_bindings));
187 const std::size_t absolute_offset = offset - main_offset; 334 if (found == precompiled_programs.end()) {
188 return (absolute_offset % SchedPeriod) == 0; 335 return {};
336 }
337 return found->second;
189} 338}
190 339
191static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 340ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
192 constexpr std::size_t start_offset = 10; 341 BaseBindings base_bindings) const {
193 std::size_t offset = start_offset; 342 return {unique_identifier, base_bindings, primitive_mode};
194 std::size_t size = start_offset * sizeof(u64); 343}
195 while (offset < program.size()) { 344
196 const u64 inst = program[offset]; 345ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system)
197 if (!IsSchedInstruction(offset, start_offset)) { 346 : RasterizerCache{rasterizer}, disk_cache{system} {}
198 if (inst == 0 || (inst >> 52) == 0x50b) { 347
199 break; 348void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
349 const VideoCore::DiskResourceLoadCallback& callback) {
350 const auto transferable = disk_cache.LoadTransferable();
351 if (!transferable) {
352 return;
353 }
354 const auto [raws, usages] = *transferable;
355
356 auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
357
358 const auto supported_formats{GetSupportedFormats()};
359 const auto unspecialized{
360 GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
361 if (stop_loading)
362 return;
363
364 // Build shaders
365 if (callback)
366 callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
367 for (std::size_t i = 0; i < usages.size(); ++i) {
368 if (stop_loading)
369 return;
370
371 const auto& usage{usages[i]};
372 LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
373 i + 1, usages.size());
374
375 const auto& unspec{unspecialized.at(usage.unique_identifier)};
376 const auto dump_it = dumps.find(usage);
377
378 CachedProgram shader;
379 if (dump_it != dumps.end()) {
380 // If the shader is dumped, attempt to load it with
381 shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
382 if (!shader) {
383 // Invalidate the precompiled cache if a shader dumped shader was rejected
384 disk_cache.InvalidatePrecompiled();
385 dumps.clear();
200 } 386 }
201 } 387 }
202 size += sizeof(inst); 388 if (!shader) {
203 offset++; 389 shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
390 usage.bindings, usage.primitive, true);
391 }
392 precompiled_programs.insert({usage, std::move(shader)});
393
394 if (callback)
395 callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
396 }
397
398 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
399 // precompiling them
400
401 for (std::size_t i = 0; i < usages.size(); ++i) {
402 const auto& usage{usages[i]};
403 if (dumps.find(usage) == dumps.end()) {
404 const auto& program = precompiled_programs.at(usage);
405 disk_cache.SaveDump(usage, program->handle);
406 }
204 } 407 }
205 return size;
206} 408}
207 409
208void CachedShader::CalculateProperties() { 410CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
209 setup.program.real_size = CalculateProgramSize(setup.program.code); 411 const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
210 setup.program.real_size_b = 0; 412
211 setup.program.unique_identifier = Common::CityHash64( 413 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
212 reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size); 414 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
213 if (program_type == Maxwell::ShaderProgram::VertexA) { 415 return {};
214 std::size_t seed = 0;
215 boost::hash_combine(seed, setup.program.unique_identifier);
216 setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
217 const u64 identifier_b = Common::CityHash64(
218 reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
219 boost::hash_combine(seed, identifier_b);
220 setup.program.unique_identifier = static_cast<u64>(seed);
221 } 416 }
417
418 CachedProgram shader = std::make_shared<OGLProgram>();
419 shader->handle = glCreateProgram();
420 glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
421 glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
422 static_cast<GLsizei>(dump.binary.size()));
423
424 GLint link_status{};
425 glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
426 if (link_status == GL_FALSE) {
427 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
428 return {};
429 }
430
431 return shader;
222} 432}
223 433
224ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {} 434std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
435 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
436 const std::vector<ShaderDiskCacheRaw>& raws,
437 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
438 std::unordered_map<u64, UnspecializedShader> unspecialized;
439
440 if (callback)
441 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
442
443 for (std::size_t i = 0; i < raws.size(); ++i) {
444 if (stop_loading)
445 return {};
446
447 const auto& raw{raws[i]};
448 const u64 unique_identifier = raw.GetUniqueIdentifier();
449 const u64 calculated_hash =
450 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
451 if (unique_identifier != calculated_hash) {
452 LOG_ERROR(
453 Render_OpenGL,
454 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
455 raw.GetUniqueIdentifier(), calculated_hash);
456 disk_cache.InvalidateTransferable();
457 return {};
458 }
459
460 GLShader::ProgramResult result;
461 if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
462 // If it's stored in the precompiled file, avoid decompiling it here
463 const auto& stored_decompiled{it->second};
464 result = {stored_decompiled.code, stored_decompiled.entries};
465 } else {
466 // Otherwise decompile the shader at boot and save the result to the decompiled file
467 result =
468 CreateProgram(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB());
469 disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
470 }
471
472 precompiled_shaders.insert({unique_identifier, result});
473
474 unspecialized.insert(
475 {raw.GetUniqueIdentifier(),
476 {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
477
478 if (callback)
479 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
480 }
481 return unspecialized;
482}
225 483
226Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 484Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
227 if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { 485 if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
@@ -235,7 +493,23 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
235 493
236 if (!shader) { 494 if (!shader) {
237 // No shader found - create a new one 495 // No shader found - create a new one
238 shader = std::make_shared<CachedShader>(program_addr, program); 496 ProgramCode program_code = GetShaderCode(program_addr);
497 ProgramCode program_code_b;
498 if (program == Maxwell::ShaderProgram::VertexA) {
499 program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB));
500 }
501 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
502
503 const auto found = precompiled_shaders.find(unique_identifier);
504 if (found != precompiled_shaders.end()) {
505 shader =
506 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
507 precompiled_programs, found->second);
508 } else {
509 shader = std::make_shared<CachedShader>(
510 program_addr, unique_identifier, program, disk_cache, precompiled_programs,
511 std::move(program_code), std::move(program_code_b));
512 }
239 Register(shader); 513 Register(shader);
240 } 514 }
241 515
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 904d15dd0..97eed192f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -5,40 +5,49 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <map>
9#include <memory> 8#include <memory>
9#include <set>
10#include <tuple> 10#include <tuple>
11#include <unordered_map>
11 12
12#include <glad/glad.h> 13#include <glad/glad.h>
13 14
14#include "common/assert.h" 15#include "common/assert.h"
15#include "common/common_types.h" 16#include "common/common_types.h"
16#include "video_core/rasterizer_cache.h" 17#include "video_core/rasterizer_cache.h"
18#include "video_core/renderer_base.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 19#include "video_core/renderer_opengl/gl_resource_manager.h"
18#include "video_core/renderer_opengl/gl_shader_decompiler.h" 20#include "video_core/renderer_opengl/gl_shader_decompiler.h"
21#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
19#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
20 23
24namespace Core {
25class System;
26} // namespace Core
27
21namespace OpenGL { 28namespace OpenGL {
22 29
23class CachedShader; 30class CachedShader;
24class RasterizerOpenGL; 31class RasterizerOpenGL;
32struct UnspecializedShader;
25 33
26using Shader = std::shared_ptr<CachedShader>; 34using Shader = std::shared_ptr<CachedShader>;
35using CachedProgram = std::shared_ptr<OGLProgram>;
27using Maxwell = Tegra::Engines::Maxwell3D::Regs; 36using Maxwell = Tegra::Engines::Maxwell3D::Regs;
28 37using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
29struct BaseBindings { 38using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
30 u32 cbuf{};
31 u32 gmem{};
32 u32 sampler{};
33
34 bool operator<(const BaseBindings& rhs) const {
35 return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
36 }
37};
38 39
39class CachedShader final : public RasterizerCacheObject { 40class CachedShader final : public RasterizerCacheObject {
40public: 41public:
41 CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); 42 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
43 ShaderDiskCacheOpenGL& disk_cache,
44 const PrecompiledPrograms& precompiled_programs,
45 ProgramCode&& program_code, ProgramCode&& program_code_b);
46
47 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type,
48 ShaderDiskCacheOpenGL& disk_cache,
49 const PrecompiledPrograms& precompiled_programs,
50 GLShader::ProgramResult result);
42 51
43 VAddr GetAddr() const override { 52 VAddr GetAddr() const override {
44 return addr; 53 return addr;
@@ -65,49 +74,67 @@ private:
65 // declared by the hardware. Workaround this issue by generating a different shader per input 74 // declared by the hardware. Workaround this issue by generating a different shader per input
66 // topology class. 75 // topology class.
67 struct GeometryPrograms { 76 struct GeometryPrograms {
68 OGLProgram points; 77 CachedProgram points;
69 OGLProgram lines; 78 CachedProgram lines;
70 OGLProgram lines_adjacency; 79 CachedProgram lines_adjacency;
71 OGLProgram triangles; 80 CachedProgram triangles;
72 OGLProgram triangles_adjacency; 81 CachedProgram triangles_adjacency;
73 }; 82 };
74 83
75 std::string AllocateBindings(BaseBindings base_bindings);
76
77 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); 84 GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings);
78 85
79 /// Generates a geometry shader or returns one that already exists. 86 /// Generates a geometry shader or returns one that already exists.
80 GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, 87 GLuint LazyGeometryProgram(CachedProgram& target_program, BaseBindings base_bindings,
81 const std::string& glsl_topology, u32 max_vertices, 88 GLenum primitive_mode);
82 const std::string& debug_name); 89
90 CachedProgram TryLoadProgram(GLenum primitive_mode, BaseBindings base_bindings) const;
83 91
84 void CalculateProperties(); 92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
85 93
86 VAddr addr{}; 94 VAddr addr{};
87 std::size_t shader_length{}; 95 u64 unique_identifier{};
88 Maxwell::ShaderProgram program_type{}; 96 Maxwell::ShaderProgram program_type{};
89 GLShader::ShaderSetup setup; 97 ShaderDiskCacheOpenGL& disk_cache;
98 const PrecompiledPrograms& precompiled_programs;
99
100 std::size_t shader_length{};
90 GLShader::ShaderEntries entries; 101 GLShader::ShaderEntries entries;
91 102
92 std::string code; 103 std::string code;
93 104
94 std::map<BaseBindings, OGLProgram> programs; 105 std::unordered_map<BaseBindings, CachedProgram> programs;
95 std::map<BaseBindings, GeometryPrograms> geometry_programs; 106 std::unordered_map<BaseBindings, GeometryPrograms> geometry_programs;
96 107
97 std::map<u32, GLuint> cbuf_resource_cache; 108 std::unordered_map<u32, GLuint> cbuf_resource_cache;
98 std::map<u32, GLuint> gmem_resource_cache; 109 std::unordered_map<u32, GLuint> gmem_resource_cache;
99 std::map<u32, GLint> uniform_cache; 110 std::unordered_map<u32, GLint> uniform_cache;
100}; 111};
101 112
102class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 113class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
103public: 114public:
104 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer); 115 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system);
116
117 /// Loads disk cache for the current game
118 void LoadDiskCache(const std::atomic_bool& stop_loading,
119 const VideoCore::DiskResourceLoadCallback& callback);
105 120
106 /// Gets the current specified shader stage program 121 /// Gets the current specified shader stage program
107 Shader GetStageProgram(Maxwell::ShaderProgram program); 122 Shader GetStageProgram(Maxwell::ShaderProgram program);
108 123
109private: 124private:
125 std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
126 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
127 const std::vector<ShaderDiskCacheRaw>& raws,
128 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
129
130 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
131 const std::set<GLenum>& supported_formats);
132
110 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 133 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
134
135 ShaderDiskCacheOpenGL disk_cache;
136 PrecompiledShaders precompiled_shaders;
137 PrecompiledPrograms precompiled_programs;
111}; 138};
112 139
113} // namespace OpenGL 140} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8e3c20090..70e124dc4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -193,15 +193,14 @@ public:
193 ShaderEntries GetShaderEntries() const { 193 ShaderEntries GetShaderEntries() const {
194 ShaderEntries entries; 194 ShaderEntries entries;
195 for (const auto& cbuf : ir.GetConstantBuffers()) { 195 for (const auto& cbuf : ir.GetConstantBuffers()) {
196 entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first), 196 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
197 cbuf.first); 197 cbuf.first);
198 } 198 }
199 for (const auto& sampler : ir.GetSamplers()) { 199 for (const auto& sampler : ir.GetSamplers()) {
200 entries.samplers.emplace_back(sampler, stage, GetSampler(sampler)); 200 entries.samplers.emplace_back(sampler);
201 } 201 }
202 for (const auto& gmem : ir.GetGlobalMemoryBases()) { 202 for (const auto& gmem : ir.GetGlobalMemoryBases()) {
203 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage, 203 entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset);
204 GetGlobalMemoryBlock(gmem));
205 } 204 }
206 entries.clip_distances = ir.GetClipDistances(); 205 entries.clip_distances = ir.GetClipDistances();
207 entries.shader_length = ir.GetLength(); 206 entries.shader_length = ir.GetLength();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 0856a1361..72aca4938 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <set>
8#include <string> 9#include <string>
9#include <utility> 10#include <utility>
10#include <vector> 11#include <vector>
@@ -18,56 +19,29 @@ class ShaderIR;
18 19
19namespace OpenGL::GLShader { 20namespace OpenGL::GLShader {
20 21
22struct ShaderEntries;
23
21using Maxwell = Tegra::Engines::Maxwell3D::Regs; 24using Maxwell = Tegra::Engines::Maxwell3D::Regs;
25using ProgramResult = std::pair<std::string, ShaderEntries>;
26using SamplerEntry = VideoCommon::Shader::Sampler;
22 27
23class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer { 28class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
24public: 29public:
25 explicit ConstBufferEntry(const VideoCommon::Shader::ConstBuffer& entry, 30 explicit ConstBufferEntry(u32 max_offset, bool is_indirect, u32 index)
26 Maxwell::ShaderStage stage, const std::string& name, u32 index) 31 : VideoCommon::Shader::ConstBuffer{max_offset, is_indirect}, index{index} {}
27 : VideoCommon::Shader::ConstBuffer{entry}, stage{stage}, name{name}, index{index} {}
28
29 const std::string& GetName() const {
30 return name;
31 }
32
33 Maxwell::ShaderStage GetStage() const {
34 return stage;
35 }
36 32
37 u32 GetIndex() const { 33 u32 GetIndex() const {
38 return index; 34 return index;
39 } 35 }
40 36
41private: 37private:
42 std::string name;
43 Maxwell::ShaderStage stage{};
44 u32 index{}; 38 u32 index{};
45}; 39};
46 40
47class SamplerEntry : public VideoCommon::Shader::Sampler {
48public:
49 explicit SamplerEntry(const VideoCommon::Shader::Sampler& entry, Maxwell::ShaderStage stage,
50 const std::string& name)
51 : VideoCommon::Shader::Sampler{entry}, stage{stage}, name{name} {}
52
53 const std::string& GetName() const {
54 return name;
55 }
56
57 Maxwell::ShaderStage GetStage() const {
58 return stage;
59 }
60
61private:
62 std::string name;
63 Maxwell::ShaderStage stage{};
64};
65
66class GlobalMemoryEntry { 41class GlobalMemoryEntry {
67public: 42public:
68 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage, 43 explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset)
69 std::string name) 44 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset} {}
70 : cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
71 45
72 u32 GetCbufIndex() const { 46 u32 GetCbufIndex() const {
73 return cbuf_index; 47 return cbuf_index;
@@ -77,19 +51,9 @@ public:
77 return cbuf_offset; 51 return cbuf_offset;
78 } 52 }
79 53
80 const std::string& GetName() const {
81 return name;
82 }
83
84 Maxwell::ShaderStage GetStage() const {
85 return stage;
86 }
87
88private: 54private:
89 u32 cbuf_index{}; 55 u32 cbuf_index{};
90 u32 cbuf_offset{}; 56 u32 cbuf_offset{};
91 Maxwell::ShaderStage stage{};
92 std::string name;
93}; 57};
94 58
95struct ShaderEntries { 59struct ShaderEntries {
@@ -100,8 +64,6 @@ struct ShaderEntries {
100 std::size_t shader_length{}; 64 std::size_t shader_length{};
101}; 65};
102 66
103using ProgramResult = std::pair<std::string, ShaderEntries>;
104
105std::string GetCommonDeclarations(); 67std::string GetCommonDeclarations();
106 68
107ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage, 69ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
new file mode 100644
index 000000000..81882822b
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -0,0 +1,656 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8#include <fmt/format.h>
9#include <lz4.h>
10
11#include "common/assert.h"
12#include "common/common_paths.h"
13#include "common/common_types.h"
14#include "common/file_util.h"
15#include "common/logging/log.h"
16#include "common/scm_rev.h"
17
18#include "core/core.h"
19#include "core/hle/kernel/process.h"
20#include "core/settings.h"
21
22#include "video_core/renderer_opengl/gl_shader_cache.h"
23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
24
25namespace OpenGL {
26
27using ShaderCacheVersionHash = std::array<u8, 64>;
28
29enum class TransferableEntryKind : u32 {
30 Raw,
31 Usage,
32};
33
34enum class PrecompiledEntryKind : u32 {
35 Decompiled,
36 Dump,
37};
38
39constexpr u32 NativeVersion = 1;
40
41// Making sure sizes doesn't change by accident
42static_assert(sizeof(BaseBindings) == 12);
43static_assert(sizeof(ShaderDiskCacheUsage) == 24);
44
45namespace {
46
47ShaderCacheVersionHash GetShaderCacheVersionHash() {
48 ShaderCacheVersionHash hash{};
49 const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
50 std::memcpy(hash.data(), Common::g_shader_cache_version, length);
51 return hash;
52}
53
54template <typename T>
55std::vector<u8> CompressData(const T* source, std::size_t source_size) {
56 if (source_size > LZ4_MAX_INPUT_SIZE) {
57 // Source size exceeds LZ4 maximum input size
58 return {};
59 }
60 const auto source_size_int = static_cast<int>(source_size);
61 const int max_compressed_size = LZ4_compressBound(source_size_int);
62 std::vector<u8> compressed(max_compressed_size);
63 const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source),
64 reinterpret_cast<char*>(compressed.data()),
65 source_size_int, max_compressed_size);
66 if (compressed_size <= 0) {
67 // Compression failed
68 return {};
69 }
70 compressed.resize(compressed_size);
71 return compressed;
72}
73
74std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) {
75 std::vector<u8> uncompressed(uncompressed_size);
76 const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()),
77 reinterpret_cast<char*>(uncompressed.data()),
78 static_cast<int>(compressed.size()),
79 static_cast<int>(uncompressed.size()));
80 if (static_cast<int>(uncompressed_size) != size_check) {
81 // Decompression failed
82 return {};
83 }
84 return uncompressed;
85}
86
87} // namespace
88
89ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
90 u32 program_code_size, u32 program_code_size_b,
91 ProgramCode program_code, ProgramCode program_code_b)
92 : unique_identifier{unique_identifier}, program_type{program_type},
93 program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
94 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
95
96ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
97
98ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
99
100bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
101 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
102 file.ReadBytes(&program_type, sizeof(u32)) != sizeof(u32)) {
103 return false;
104 }
105 u32 program_code_size{};
106 u32 program_code_size_b{};
107 if (file.ReadBytes(&program_code_size, sizeof(u32)) != sizeof(u32) ||
108 file.ReadBytes(&program_code_size_b, sizeof(u32)) != sizeof(u32)) {
109 return false;
110 }
111
112 program_code.resize(program_code_size);
113 program_code_b.resize(program_code_size_b);
114
115 if (file.ReadArray(program_code.data(), program_code_size) != program_code_size)
116 return false;
117
118 if (HasProgramA() &&
119 file.ReadArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
120 return false;
121 }
122 return true;
123}
124
125bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
126 if (file.WriteObject(unique_identifier) != 1 ||
127 file.WriteObject(static_cast<u32>(program_type)) != 1 ||
128 file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
129 return false;
130 }
131
132 if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
133 return false;
134
135 if (HasProgramA() &&
136 file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
137 return false;
138 }
139 return true;
140}
141
142ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
143
144std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
145ShaderDiskCacheOpenGL::LoadTransferable() {
146 // Skip games without title id
147 const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
148 if (!Settings::values.use_disk_shader_cache || !has_title_id)
149 return {};
150 tried_to_load = true;
151
152 FileUtil::IOFile file(GetTransferablePath(), "rb");
153 if (!file.IsOpen()) {
154 LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
155 GetTitleID());
156 return {};
157 }
158
159 u32 version{};
160 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
161 LOG_ERROR(Render_OpenGL,
162 "Failed to get transferable cache version for title id={} - skipping",
163 GetTitleID());
164 return {};
165 }
166
167 if (version < NativeVersion) {
168 LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
169 file.Close();
170 InvalidateTransferable();
171 return {};
172 }
173 if (version > NativeVersion) {
174 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
175 "of the emulator - skipping");
176 return {};
177 }
178
179 // Version is valid, load the shaders
180 std::vector<ShaderDiskCacheRaw> raws;
181 std::vector<ShaderDiskCacheUsage> usages;
182 while (file.Tell() < file.GetSize()) {
183 TransferableEntryKind kind{};
184 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
185 LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
186 return {};
187 }
188
189 switch (kind) {
190 case TransferableEntryKind::Raw: {
191 ShaderDiskCacheRaw entry;
192 if (!entry.Load(file)) {
193 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
194 return {};
195 }
196 transferable.insert({entry.GetUniqueIdentifier(), {}});
197 raws.push_back(std::move(entry));
198 break;
199 }
200 case TransferableEntryKind::Usage: {
201 ShaderDiskCacheUsage usage{};
202 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
203 LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
204 return {};
205 }
206 usages.push_back(std::move(usage));
207 break;
208 }
209 default:
210 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
211 static_cast<u32>(kind));
212 return {};
213 }
214 }
215 return {{raws, usages}};
216}
217
218std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
219 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
220ShaderDiskCacheOpenGL::LoadPrecompiled() {
221 if (!IsUsable())
222 return {};
223
224 FileUtil::IOFile file(GetPrecompiledPath(), "rb");
225 if (!file.IsOpen()) {
226 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
227 GetTitleID());
228 return {};
229 }
230
231 const auto result = LoadPrecompiledFile(file);
232 if (!result) {
233 LOG_INFO(Render_OpenGL,
234 "Failed to load precompiled cache for game with title id={} - removing",
235 GetTitleID());
236 file.Close();
237 InvalidatePrecompiled();
238 return {};
239 }
240 return *result;
241}
242
243std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
244 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
245ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
246 ShaderCacheVersionHash file_hash{};
247 if (file.ReadArray(file_hash.data(), file_hash.size()) != file_hash.size()) {
248 return {};
249 }
250 if (GetShaderCacheVersionHash() != file_hash) {
251 LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
252 return {};
253 }
254
255 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
256 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
257 while (file.Tell() < file.GetSize()) {
258 PrecompiledEntryKind kind{};
259 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
260 return {};
261 }
262
263 switch (kind) {
264 case PrecompiledEntryKind::Decompiled: {
265 u64 unique_identifier{};
266 if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64))
267 return {};
268
269 const auto entry = LoadDecompiledEntry(file);
270 if (!entry)
271 return {};
272 decompiled.insert({unique_identifier, std::move(*entry)});
273 break;
274 }
275 case PrecompiledEntryKind::Dump: {
276 ShaderDiskCacheUsage usage;
277 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage))
278 return {};
279
280 ShaderDiskCacheDump dump;
281 if (file.ReadBytes(&dump.binary_format, sizeof(u32)) != sizeof(u32))
282 return {};
283
284 u32 binary_length{};
285 u32 compressed_size{};
286 if (file.ReadBytes(&binary_length, sizeof(u32)) != sizeof(u32) ||
287 file.ReadBytes(&compressed_size, sizeof(u32)) != sizeof(u32)) {
288 return {};
289 }
290
291 std::vector<u8> compressed_binary(compressed_size);
292 if (file.ReadArray(compressed_binary.data(), compressed_binary.size()) !=
293 compressed_binary.size()) {
294 return {};
295 }
296
297 dump.binary = DecompressData(compressed_binary, binary_length);
298 if (dump.binary.empty()) {
299 return {};
300 }
301
302 dumps.insert({usage, dump});
303 break;
304 }
305 default:
306 return {};
307 }
308 }
309 return {{decompiled, dumps}};
310}
311
312std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry(
313 FileUtil::IOFile& file) {
314 u32 code_size{};
315 u32 compressed_code_size{};
316 if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
317 file.ReadBytes(&compressed_code_size, sizeof(u32)) != sizeof(u32)) {
318 return {};
319 }
320
321 std::vector<u8> compressed_code(compressed_code_size);
322 if (file.ReadArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
323 return {};
324 }
325
326 const std::vector<u8> code = DecompressData(compressed_code, code_size);
327 if (code.empty()) {
328 return {};
329 }
330 ShaderDiskCacheDecompiled entry;
331 entry.code = std::string(reinterpret_cast<const char*>(code.data()), code_size);
332
333 u32 const_buffers_count{};
334 if (file.ReadBytes(&const_buffers_count, sizeof(u32)) != sizeof(u32))
335 return {};
336 for (u32 i = 0; i < const_buffers_count; ++i) {
337 u32 max_offset{};
338 u32 index{};
339 u8 is_indirect{};
340 if (file.ReadBytes(&max_offset, sizeof(u32)) != sizeof(u32) ||
341 file.ReadBytes(&index, sizeof(u32)) != sizeof(u32) ||
342 file.ReadBytes(&is_indirect, sizeof(u8)) != sizeof(u8)) {
343 return {};
344 }
345 entry.entries.const_buffers.emplace_back(max_offset, is_indirect != 0, index);
346 }
347
348 u32 samplers_count{};
349 if (file.ReadBytes(&samplers_count, sizeof(u32)) != sizeof(u32))
350 return {};
351 for (u32 i = 0; i < samplers_count; ++i) {
352 u64 offset{};
353 u64 index{};
354 u32 type{};
355 u8 is_array{};
356 u8 is_shadow{};
357 if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
358 file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
359 file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
360 file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
361 file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
362 return {};
363 }
364 entry.entries.samplers.emplace_back(
365 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
366 static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
367 }
368
369 u32 global_memory_count{};
370 if (file.ReadBytes(&global_memory_count, sizeof(u32)) != sizeof(u32))
371 return {};
372 for (u32 i = 0; i < global_memory_count; ++i) {
373 u32 cbuf_index{};
374 u32 cbuf_offset{};
375 if (file.ReadBytes(&cbuf_index, sizeof(u32)) != sizeof(u32) ||
376 file.ReadBytes(&cbuf_offset, sizeof(u32)) != sizeof(u32)) {
377 return {};
378 }
379 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset);
380 }
381
382 for (auto& clip_distance : entry.entries.clip_distances) {
383 u8 clip_distance_raw{};
384 if (file.ReadBytes(&clip_distance_raw, sizeof(u8)) != sizeof(u8))
385 return {};
386 clip_distance = clip_distance_raw != 0;
387 }
388
389 u64 shader_length{};
390 if (file.ReadBytes(&shader_length, sizeof(u64)) != sizeof(u64))
391 return {};
392 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
393
394 return entry;
395}
396
397bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier,
398 const std::string& code,
399 const std::vector<u8>& compressed_code,
400 const GLShader::ShaderEntries& entries) {
401 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Decompiled)) != 1 ||
402 file.WriteObject(unique_identifier) != 1 ||
403 file.WriteObject(static_cast<u32>(code.size())) != 1 ||
404 file.WriteObject(static_cast<u32>(compressed_code.size())) != 1 ||
405 file.WriteArray(compressed_code.data(), compressed_code.size()) != compressed_code.size()) {
406 return false;
407 }
408
409 if (file.WriteObject(static_cast<u32>(entries.const_buffers.size())) != 1)
410 return false;
411 for (const auto& cbuf : entries.const_buffers) {
412 if (file.WriteObject(static_cast<u32>(cbuf.GetMaxOffset())) != 1 ||
413 file.WriteObject(static_cast<u32>(cbuf.GetIndex())) != 1 ||
414 file.WriteObject(static_cast<u8>(cbuf.IsIndirect() ? 1 : 0)) != 1) {
415 return false;
416 }
417 }
418
419 if (file.WriteObject(static_cast<u32>(entries.samplers.size())) != 1)
420 return false;
421 for (const auto& sampler : entries.samplers) {
422 if (file.WriteObject(static_cast<u64>(sampler.GetOffset())) != 1 ||
423 file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
424 file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
425 file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
426 file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
427 return false;
428 }
429 }
430
431 if (file.WriteObject(static_cast<u32>(entries.global_memory_entries.size())) != 1)
432 return false;
433 for (const auto& gmem : entries.global_memory_entries) {
434 if (file.WriteObject(static_cast<u32>(gmem.GetCbufIndex())) != 1 ||
435 file.WriteObject(static_cast<u32>(gmem.GetCbufOffset())) != 1) {
436 return false;
437 }
438 }
439
440 for (const bool clip_distance : entries.clip_distances) {
441 if (file.WriteObject(static_cast<u8>(clip_distance ? 1 : 0)) != 1)
442 return false;
443 }
444
445 return file.WriteObject(static_cast<u64>(entries.shader_length)) == 1;
446}
447
448void ShaderDiskCacheOpenGL::InvalidateTransferable() const {
449 if (!FileUtil::Delete(GetTransferablePath())) {
450 LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
451 GetTransferablePath());
452 }
453 InvalidatePrecompiled();
454}
455
456void ShaderDiskCacheOpenGL::InvalidatePrecompiled() const {
457 if (!FileUtil::Delete(GetPrecompiledPath())) {
458 LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath());
459 }
460}
461
462void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
463 if (!IsUsable())
464 return;
465
466 const u64 id = entry.GetUniqueIdentifier();
467 if (transferable.find(id) != transferable.end()) {
468 // The shader already exists
469 return;
470 }
471
472 FileUtil::IOFile file = AppendTransferableFile();
473 if (!file.IsOpen())
474 return;
475 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
476 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
477 file.Close();
478 InvalidateTransferable();
479 return;
480 }
481 transferable.insert({id, {}});
482}
483
484void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
485 if (!IsUsable())
486 return;
487
488 const auto it = transferable.find(usage.unique_identifier);
489 ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
490
491 auto& usages{it->second};
492 ASSERT(usages.find(usage) == usages.end());
493 usages.insert(usage);
494
495 FileUtil::IOFile file = AppendTransferableFile();
496 if (!file.IsOpen())
497 return;
498
499 if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
500 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
501 file.Close();
502 InvalidateTransferable();
503 return;
504 }
505}
506
507void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
508 const GLShader::ShaderEntries& entries) {
509 if (!IsUsable())
510 return;
511
512 const std::vector<u8> compressed_code{CompressData(code.data(), code.size())};
513 if (compressed_code.empty()) {
514 LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}",
515 unique_identifier);
516 return;
517 }
518
519 FileUtil::IOFile file = AppendPrecompiledFile();
520 if (!file.IsOpen())
521 return;
522
523 if (!SaveDecompiledFile(file, unique_identifier, code, compressed_code, entries)) {
524 LOG_ERROR(Render_OpenGL,
525 "Failed to save decompiled entry to the precompiled file - removing");
526 file.Close();
527 InvalidatePrecompiled();
528 }
529}
530
531void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
532 if (!IsUsable())
533 return;
534
535 GLint binary_length{};
536 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
537
538 GLenum binary_format{};
539 std::vector<u8> binary(binary_length);
540 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
541
542 const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size());
543 if (compressed_binary.empty()) {
544 LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}",
545 usage.unique_identifier);
546 return;
547 }
548
549 FileUtil::IOFile file = AppendPrecompiledFile();
550 if (!file.IsOpen())
551 return;
552
553 if (file.WriteObject(static_cast<u32>(PrecompiledEntryKind::Dump)) != 1 ||
554 file.WriteObject(usage) != 1 || file.WriteObject(static_cast<u32>(binary_format)) != 1 ||
555 file.WriteObject(static_cast<u32>(binary_length)) != 1 ||
556 file.WriteObject(static_cast<u32>(compressed_binary.size())) != 1 ||
557 file.WriteArray(compressed_binary.data(), compressed_binary.size()) !=
558 compressed_binary.size()) {
559 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
560 usage.unique_identifier);
561 file.Close();
562 InvalidatePrecompiled();
563 return;
564 }
565}
566
567bool ShaderDiskCacheOpenGL::IsUsable() const {
568 return tried_to_load && Settings::values.use_disk_shader_cache;
569}
570
571FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
572 if (!EnsureDirectories())
573 return {};
574
575 const auto transferable_path{GetTransferablePath()};
576 const bool existed = FileUtil::Exists(transferable_path);
577
578 FileUtil::IOFile file(transferable_path, "ab");
579 if (!file.IsOpen()) {
580 LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path);
581 return {};
582 }
583 if (!existed || file.GetSize() == 0) {
584 // If the file didn't exist, write its version
585 if (file.WriteObject(NativeVersion) != 1) {
586 LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
587 transferable_path);
588 return {};
589 }
590 }
591 return file;
592}
593
594FileUtil::IOFile ShaderDiskCacheOpenGL::AppendPrecompiledFile() const {
595 if (!EnsureDirectories())
596 return {};
597
598 const auto precompiled_path{GetPrecompiledPath()};
599 const bool existed = FileUtil::Exists(precompiled_path);
600
601 FileUtil::IOFile file(precompiled_path, "ab");
602 if (!file.IsOpen()) {
603 LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path);
604 return {};
605 }
606
607 if (!existed || file.GetSize() == 0) {
608 const auto hash{GetShaderCacheVersionHash()};
609 if (file.WriteArray(hash.data(), hash.size()) != hash.size()) {
610 LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version hash in path={}",
611 precompiled_path);
612 return {};
613 }
614 }
615 return file;
616}
617
618bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
619 const auto CreateDir = [](const std::string& dir) {
620 if (!FileUtil::CreateDir(dir)) {
621 LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir);
622 return false;
623 }
624 return true;
625 };
626
627 return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) &&
628 CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
629 CreateDir(GetPrecompiledDir());
630}
631
632std::string ShaderDiskCacheOpenGL::GetTransferablePath() const {
633 return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
634}
635
636std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
637 return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin");
638}
639
640std::string ShaderDiskCacheOpenGL::GetTransferableDir() const {
641 return GetBaseDir() + DIR_SEP "transferable";
642}
643
644std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
645 return GetBaseDir() + DIR_SEP "precompiled";
646}
647
648std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
649 return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl";
650}
651
652std::string ShaderDiskCacheOpenGL::GetTitleID() const {
653 return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
654}
655
656} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
new file mode 100644
index 000000000..6be0c0547
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -0,0 +1,245 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <string>
9#include <tuple>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include <glad/glad.h>
16
17#include "common/assert.h"
18#include "common/common_types.h"
19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/renderer_opengl/gl_shader_gen.h"
21
22namespace Core {
23class System;
24}
25
26namespace FileUtil {
27class IOFile;
28}
29
30namespace OpenGL {
31
32using ProgramCode = std::vector<u64>;
33using Maxwell = Tegra::Engines::Maxwell3D::Regs;
34
35/// Allocated bindings used by an OpenGL shader program
36struct BaseBindings {
37 u32 cbuf{};
38 u32 gmem{};
39 u32 sampler{};
40
41 bool operator==(const BaseBindings& rhs) const {
42 return std::tie(cbuf, gmem, sampler) == std::tie(rhs.cbuf, rhs.gmem, rhs.sampler);
43 }
44
45 bool operator!=(const BaseBindings& rhs) const {
46 return !operator==(rhs);
47 }
48};
49
50/// Describes how a shader is used
51struct ShaderDiskCacheUsage {
52 u64 unique_identifier{};
53 BaseBindings bindings;
54 GLenum primitive{};
55
56 bool operator==(const ShaderDiskCacheUsage& rhs) const {
57 return std::tie(unique_identifier, bindings, primitive) ==
58 std::tie(rhs.unique_identifier, rhs.bindings, rhs.primitive);
59 }
60
61 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
62 return !operator==(rhs);
63 }
64};
65
66} // namespace OpenGL
67
68namespace std {
69
70template <>
71struct hash<OpenGL::BaseBindings> {
72 std::size_t operator()(const OpenGL::BaseBindings& bindings) const {
73 return bindings.cbuf | bindings.gmem << 8 | bindings.sampler << 16;
74 }
75};
76
77template <>
78struct hash<OpenGL::ShaderDiskCacheUsage> {
79 std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const {
80 return static_cast<std::size_t>(usage.unique_identifier) ^
81 std::hash<OpenGL::BaseBindings>()(usage.bindings) ^ usage.primitive << 16;
82 }
83};
84
85} // namespace std
86
87namespace OpenGL {
88
89/// Describes a shader how it's used by the guest GPU
90class ShaderDiskCacheRaw {
91public:
92 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type,
93 u32 program_code_size, u32 program_code_size_b,
94 ProgramCode program_code, ProgramCode program_code_b);
95 ShaderDiskCacheRaw();
96 ~ShaderDiskCacheRaw();
97
98 bool Load(FileUtil::IOFile& file);
99
100 bool Save(FileUtil::IOFile& file) const;
101
102 u64 GetUniqueIdentifier() const {
103 return unique_identifier;
104 }
105
106 bool HasProgramA() const {
107 return program_type == Maxwell::ShaderProgram::VertexA;
108 }
109
110 Maxwell::ShaderProgram GetProgramType() const {
111 return program_type;
112 }
113
114 Maxwell::ShaderStage GetProgramStage() const {
115 switch (program_type) {
116 case Maxwell::ShaderProgram::VertexA:
117 case Maxwell::ShaderProgram::VertexB:
118 return Maxwell::ShaderStage::Vertex;
119 case Maxwell::ShaderProgram::TesselationControl:
120 return Maxwell::ShaderStage::TesselationControl;
121 case Maxwell::ShaderProgram::TesselationEval:
122 return Maxwell::ShaderStage::TesselationEval;
123 case Maxwell::ShaderProgram::Geometry:
124 return Maxwell::ShaderStage::Geometry;
125 case Maxwell::ShaderProgram::Fragment:
126 return Maxwell::ShaderStage::Fragment;
127 }
128 UNREACHABLE();
129 }
130
131 const ProgramCode& GetProgramCode() const {
132 return program_code;
133 }
134
135 const ProgramCode& GetProgramCodeB() const {
136 return program_code_b;
137 }
138
139private:
140 u64 unique_identifier{};
141 Maxwell::ShaderProgram program_type{};
142 u32 program_code_size{};
143 u32 program_code_size_b{};
144
145 ProgramCode program_code;
146 ProgramCode program_code_b;
147};
148
149/// Contains decompiled data from a shader
150struct ShaderDiskCacheDecompiled {
151 std::string code;
152 GLShader::ShaderEntries entries;
153};
154
155/// Contains an OpenGL dumped binary program
156struct ShaderDiskCacheDump {
157 GLenum binary_format;
158 std::vector<u8> binary;
159};
160
161class ShaderDiskCacheOpenGL {
162public:
163 explicit ShaderDiskCacheOpenGL(Core::System& system);
164
165 /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
166 std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
167 LoadTransferable();
168
169 /// Loads current game's precompiled cache. Invalidates on failure.
170 std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
171 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
172 LoadPrecompiled();
173
174 /// Removes the transferable (and precompiled) cache file.
175 void InvalidateTransferable() const;
176
177 /// Removes the precompiled cache file.
178 void InvalidatePrecompiled() const;
179
180 /// Saves a raw dump to the transferable file. Checks for collisions.
181 void SaveRaw(const ShaderDiskCacheRaw& entry);
182
183 /// Saves shader usage to the transferable file. Does not check for collisions.
184 void SaveUsage(const ShaderDiskCacheUsage& usage);
185
186 /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
187 void SaveDecompiled(u64 unique_identifier, const std::string& code,
188 const GLShader::ShaderEntries& entries);
189
190 /// Saves a dump entry to the precompiled file. Does not check for collisions.
191 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
192
193private:
194 /// Loads the transferable cache. Returns empty on failure.
195 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
196 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
197 LoadPrecompiledFile(FileUtil::IOFile& file);
198
199 /// Loads a decompiled cache entry from the passed file. Returns empty on failure.
200 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(FileUtil::IOFile& file);
201
202 /// Saves a decompiled entry to the passed file. Returns true on success.
203 bool SaveDecompiledFile(FileUtil::IOFile& file, u64 unique_identifier, const std::string& code,
204 const std::vector<u8>& compressed_code,
205 const GLShader::ShaderEntries& entries);
206
207 /// Returns if the cache can be used
208 bool IsUsable() const;
209
210 /// Opens current game's transferable file and write it's header if it doesn't exist
211 FileUtil::IOFile AppendTransferableFile() const;
212
213 /// Opens current game's precompiled file and write it's header if it doesn't exist
214 FileUtil::IOFile AppendPrecompiledFile() const;
215
216 /// Create shader disk cache directories. Returns true on success.
217 bool EnsureDirectories() const;
218
219 /// Gets current game's transferable file path
220 std::string GetTransferablePath() const;
221
222 /// Gets current game's precompiled file path
223 std::string GetPrecompiledPath() const;
224
225 /// Get user's transferable directory path
226 std::string GetTransferableDir() const;
227
228 /// Get user's precompiled directory path
229 std::string GetPrecompiledDir() const;
230
231 /// Get user's shader directory path
232 std::string GetBaseDir() const;
233
234 /// Get current game's title id
235 std::string GetTitleID() const;
236
237 // Copre system
238 Core::System& system;
239 // Stored transferable shaders
240 std::map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
241 // The cache has been loaded at boot
242 bool tried_to_load{};
243};
244
245} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ac5e6917b..fba8e681b 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -26,12 +26,10 @@ struct ShaderSetup {
26 ProgramCode code; 26 ProgramCode code;
27 ProgramCode code_b; // Used for dual vertex shaders 27 ProgramCode code_b; // Used for dual vertex shaders
28 u64 unique_identifier; 28 u64 unique_identifier;
29 std::size_t real_size;
30 std::size_t real_size_b;
31 } program; 29 } program;
32 30
33 /// Used in scenarios where we have a dual vertex shaders 31 /// Used in scenarios where we have a dual vertex shaders
34 void SetProgramB(ProgramCode&& program_b) { 32 void SetProgramB(ProgramCode program_b) {
35 program.code_b = std::move(program_b); 33 program.code_b = std::move(program_b);
36 has_program_b = true; 34 has_program_b = true;
37 } 35 }
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 285594f50..03b7548c2 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -47,7 +47,7 @@ GLuint LoadShader(const char* source, GLenum type);
47 * @returns Handle of the newly created OpenGL program object 47 * @returns Handle of the newly created OpenGL program object
48 */ 48 */
49template <typename... T> 49template <typename... T>
50GLuint LoadProgram(bool separable_program, T... shaders) { 50GLuint LoadProgram(bool separable_program, bool hint_retrievable, T... shaders) {
51 // Link the program 51 // Link the program
52 LOG_DEBUG(Render_OpenGL, "Linking program..."); 52 LOG_DEBUG(Render_OpenGL, "Linking program...");
53 53
@@ -58,6 +58,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) {
58 if (separable_program) { 58 if (separable_program) {
59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); 59 glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE);
60 } 60 }
61 if (hint_retrievable) {
62 glProgramParameteri(program_id, GL_PROGRAM_BINARY_RETRIEVABLE_HINT, GL_TRUE);
63 }
61 64
62 glLinkProgram(program_id); 65 glLinkProgram(program_id);
63 66
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index b7ba59350..81af803bc 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -462,29 +462,35 @@ void OpenGLState::ApplyPolygonOffset() const {
462} 462}
463 463
464void OpenGLState::ApplyTextures() const { 464void OpenGLState::ApplyTextures() const {
465 bool has_delta{};
466 std::size_t first{};
467 std::size_t last{};
468 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures;
469
465 for (std::size_t i = 0; i < std::size(texture_units); ++i) { 470 for (std::size_t i = 0; i < std::size(texture_units); ++i) {
466 const auto& texture_unit = texture_units[i]; 471 const auto& texture_unit = texture_units[i];
467 const auto& cur_state_texture_unit = cur_state.texture_units[i]; 472 const auto& cur_state_texture_unit = cur_state.texture_units[i];
473 textures[i] = texture_unit.texture;
468 474
469 if (texture_unit.texture != cur_state_texture_unit.texture) { 475 if (textures[i] != cur_state_texture_unit.texture) {
470 glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum()); 476 if (!has_delta) {
471 glBindTexture(texture_unit.target, texture_unit.texture); 477 first = i;
472 } 478 has_delta = true;
473 // Update the texture swizzle 479 }
474 if (texture_unit.swizzle.r != cur_state_texture_unit.swizzle.r || 480 last = i;
475 texture_unit.swizzle.g != cur_state_texture_unit.swizzle.g ||
476 texture_unit.swizzle.b != cur_state_texture_unit.swizzle.b ||
477 texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
478 std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
479 texture_unit.swizzle.b, texture_unit.swizzle.a};
480 glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
481 } 481 }
482 } 482 }
483
484 if (has_delta) {
485 glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
486 textures.data());
487 }
483} 488}
484 489
485void OpenGLState::ApplySamplers() const { 490void OpenGLState::ApplySamplers() const {
486 bool has_delta{}; 491 bool has_delta{};
487 std::size_t first{}, last{}; 492 std::size_t first{};
493 std::size_t last{};
488 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers; 494 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers;
489 for (std::size_t i = 0; i < std::size(samplers); ++i) { 495 for (std::size_t i = 0; i < std::size(samplers); ++i) {
490 samplers[i] = texture_units[i].sampler; 496 samplers[i] = texture_units[i].sampler;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index a5a7c0920..9e1eda5b1 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -126,26 +126,14 @@ public:
126 struct TextureUnit { 126 struct TextureUnit {
127 GLuint texture; // GL_TEXTURE_BINDING_2D 127 GLuint texture; // GL_TEXTURE_BINDING_2D
128 GLuint sampler; // GL_SAMPLER_BINDING 128 GLuint sampler; // GL_SAMPLER_BINDING
129 GLenum target;
130 struct {
131 GLint r; // GL_TEXTURE_SWIZZLE_R
132 GLint g; // GL_TEXTURE_SWIZZLE_G
133 GLint b; // GL_TEXTURE_SWIZZLE_B
134 GLint a; // GL_TEXTURE_SWIZZLE_A
135 } swizzle;
136 129
137 void Unbind() { 130 void Unbind() {
138 texture = 0; 131 texture = 0;
139 swizzle.r = GL_RED;
140 swizzle.g = GL_GREEN;
141 swizzle.b = GL_BLUE;
142 swizzle.a = GL_ALPHA;
143 } 132 }
144 133
145 void Reset() { 134 void Reset() {
146 Unbind(); 135 Unbind();
147 sampler = 0; 136 sampler = 0;
148 target = GL_TEXTURE_2D;
149 } 137 }
150 }; 138 };
151 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units; 139 std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index e37b65b38..6476a9e1a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -98,8 +98,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
98 return matrix; 98 return matrix;
99} 99}
100 100
101RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window) 101RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
102 : VideoCore::RendererBase{window} {} 102 : VideoCore::RendererBase{window}, system{system} {}
103 103
104RendererOpenGL::~RendererOpenGL() = default; 104RendererOpenGL::~RendererOpenGL() = default;
105 105
@@ -171,10 +171,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
171 Memory::GetPointer(framebuffer_addr), 171 Memory::GetPointer(framebuffer_addr),
172 gl_framebuffer_data.data(), true); 172 gl_framebuffer_data.data(), true);
173 173
174 state.texture_units[0].texture = screen_info.texture.resource.handle;
175 state.Apply();
176
177 glActiveTexture(GL_TEXTURE0);
178 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); 174 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
179 175
180 // Update existing texture 176 // Update existing texture
@@ -182,14 +178,11 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
182 // they differ from the LCD resolution. 178 // they differ from the LCD resolution.
183 // TODO: Applications could theoretically crash yuzu here by specifying too large 179 // TODO: Applications could theoretically crash yuzu here by specifying too large
184 // framebuffer sizes. We should make sure that this cannot happen. 180 // framebuffer sizes. We should make sure that this cannot happen.
185 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, framebuffer.width, framebuffer.height, 181 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
186 screen_info.texture.gl_format, screen_info.texture.gl_type, 182 framebuffer.height, screen_info.texture.gl_format,
187 gl_framebuffer_data.data()); 183 screen_info.texture.gl_type, gl_framebuffer_data.data());
188 184
189 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 185 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
190
191 state.texture_units[0].texture = 0;
192 state.Apply();
193 } 186 }
194} 187}
195 188
@@ -199,17 +192,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
199 */ 192 */
200void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 193void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
201 const TextureInfo& texture) { 194 const TextureInfo& texture) {
202 state.texture_units[0].texture = texture.resource.handle; 195 const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
203 state.Apply(); 196 glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
204
205 glActiveTexture(GL_TEXTURE0);
206 u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r};
207
208 // Update existing texture
209 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
210
211 state.texture_units[0].texture = 0;
212 state.Apply();
213} 197}
214 198
215/** 199/**
@@ -249,26 +233,13 @@ void RendererOpenGL::InitOpenGLObjects() {
249 sizeof(ScreenRectVertex)); 233 sizeof(ScreenRectVertex));
250 234
251 // Allocate textures for the screen 235 // Allocate textures for the screen
252 screen_info.texture.resource.Create(); 236 screen_info.texture.resource.Create(GL_TEXTURE_2D);
253 237
254 // Allocation of storage is deferred until the first frame, when we 238 const GLuint texture = screen_info.texture.resource.handle;
255 // know the framebuffer size. 239 glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1);
256
257 state.texture_units[0].texture = screen_info.texture.resource.handle;
258 state.Apply();
259
260 glActiveTexture(GL_TEXTURE0);
261 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
262 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
263 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
264 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
265 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
266 240
267 screen_info.display_texture = screen_info.texture.resource.handle; 241 screen_info.display_texture = screen_info.texture.resource.handle;
268 242
269 state.texture_units[0].texture = 0;
270 state.Apply();
271
272 // Clear screen to black 243 // Clear screen to black
273 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 244 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
274} 245}
@@ -279,25 +250,24 @@ void RendererOpenGL::CreateRasterizer() {
279 } 250 }
280 // Initialize sRGB Usage 251 // Initialize sRGB Usage
281 OpenGLState::ClearsRGBUsed(); 252 OpenGLState::ClearsRGBUsed();
282 rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info); 253 rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info);
283} 254}
284 255
285void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, 256void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
286 const Tegra::FramebufferConfig& framebuffer) { 257 const Tegra::FramebufferConfig& framebuffer) {
287
288 texture.width = framebuffer.width; 258 texture.width = framebuffer.width;
289 texture.height = framebuffer.height; 259 texture.height = framebuffer.height;
290 260
291 GLint internal_format; 261 GLint internal_format;
292 switch (framebuffer.pixel_format) { 262 switch (framebuffer.pixel_format) {
293 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 263 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
294 internal_format = GL_RGBA; 264 internal_format = GL_RGBA8;
295 texture.gl_format = GL_RGBA; 265 texture.gl_format = GL_RGBA;
296 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 266 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
297 gl_framebuffer_data.resize(texture.width * texture.height * 4); 267 gl_framebuffer_data.resize(texture.width * texture.height * 4);
298 break; 268 break;
299 default: 269 default:
300 internal_format = GL_RGBA; 270 internal_format = GL_RGBA8;
301 texture.gl_format = GL_RGBA; 271 texture.gl_format = GL_RGBA;
302 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 272 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
303 gl_framebuffer_data.resize(texture.width * texture.height * 4); 273 gl_framebuffer_data.resize(texture.width * texture.height * 4);
@@ -306,15 +276,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
306 UNREACHABLE(); 276 UNREACHABLE();
307 } 277 }
308 278
309 state.texture_units[0].texture = texture.resource.handle; 279 texture.resource.Release();
310 state.Apply(); 280 texture.resource.Create(GL_TEXTURE_2D);
311 281 glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
312 glActiveTexture(GL_TEXTURE0);
313 glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
314 texture.gl_format, texture.gl_type, nullptr);
315
316 state.texture_units[0].texture = 0;
317 state.Apply();
318} 282}
319 283
320void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, 284void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
@@ -356,7 +320,6 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
356 }}; 320 }};
357 321
358 state.texture_units[0].texture = screen_info.display_texture; 322 state.texture_units[0].texture = screen_info.display_texture;
359 state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
360 // Workaround brigthness problems in SMO by enabling sRGB in the final output 323 // Workaround brigthness problems in SMO by enabling sRGB in the final output
361 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 324 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
362 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 325 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 1665018db..7e13e566b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -12,6 +12,10 @@
12#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_state.h" 13#include "video_core/renderer_opengl/gl_state.h"
14 14
15namespace Core {
16class System;
17}
18
15namespace Core::Frontend { 19namespace Core::Frontend {
16class EmuWindow; 20class EmuWindow;
17} 21}
@@ -41,7 +45,7 @@ struct ScreenInfo {
41 45
42class RendererOpenGL : public VideoCore::RendererBase { 46class RendererOpenGL : public VideoCore::RendererBase {
43public: 47public:
44 explicit RendererOpenGL(Core::Frontend::EmuWindow& window); 48 explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
45 ~RendererOpenGL() override; 49 ~RendererOpenGL() override;
46 50
47 /// Swap buffers (render frame) 51 /// Swap buffers (render frame)
@@ -72,6 +76,8 @@ private:
72 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, 76 void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
73 const TextureInfo& texture); 77 const TextureInfo& texture);
74 78
79 Core::System& system;
80
75 OpenGLState state; 81 OpenGLState state;
76 82
77 // OpenGL object IDs 83 // OpenGL object IDs
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 9b579bde1..e006f8138 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -104,19 +104,42 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
104 } 104 }
105 case OpCode::Id::LD_L: { 105 case OpCode::Id::LD_L: {
106 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", 106 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
107 static_cast<unsigned>(instr.ld_l.unknown.Value())); 107 static_cast<u32>(instr.ld_l.unknown.Value()));
108 108
109 const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), 109 const auto GetLmem = [&](s32 offset) {
110 Immediate(static_cast<s32>(instr.smem_imm))); 110 ASSERT(offset % 4 == 0);
111 const Node lmem = GetLocalMemory(index); 111 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
112 const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
113 immediate_offset);
114 return GetLocalMemory(address);
115 };
112 116
113 switch (instr.ldst_sl.type.Value()) { 117 switch (instr.ldst_sl.type.Value()) {
114 case Tegra::Shader::StoreType::Bytes32: 118 case Tegra::Shader::StoreType::Bits32:
115 SetRegister(bb, instr.gpr0, lmem); 119 case Tegra::Shader::StoreType::Bits64:
120 case Tegra::Shader::StoreType::Bits128: {
121 const u32 count = [&]() {
122 switch (instr.ldst_sl.type.Value()) {
123 case Tegra::Shader::StoreType::Bits32:
124 return 1;
125 case Tegra::Shader::StoreType::Bits64:
126 return 2;
127 case Tegra::Shader::StoreType::Bits128:
128 return 4;
129 default:
130 UNREACHABLE();
131 return 0;
132 }
133 }();
134 for (u32 i = 0; i < count; ++i)
135 SetTemporal(bb, i, GetLmem(i * 4));
136 for (u32 i = 0; i < count; ++i)
137 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
116 break; 138 break;
139 }
117 default: 140 default:
118 UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", 141 UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
119 static_cast<unsigned>(instr.ldst_sl.type.Value())); 142 static_cast<u32>(instr.ldst_sl.type.Value()));
120 } 143 }
121 break; 144 break;
122 } 145 }
@@ -203,12 +226,20 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
203 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", 226 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
204 static_cast<u32>(instr.st_l.unknown.Value())); 227 static_cast<u32>(instr.st_l.unknown.Value()));
205 228
206 const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), 229 const auto GetLmemAddr = [&](s32 offset) {
207 Immediate(static_cast<s32>(instr.smem_imm))); 230 ASSERT(offset % 4 == 0);
231 const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
232 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
233 };
208 234
209 switch (instr.ldst_sl.type.Value()) { 235 switch (instr.ldst_sl.type.Value()) {
210 case Tegra::Shader::StoreType::Bytes32: 236 case Tegra::Shader::StoreType::Bits128:
211 SetLocalMemory(bb, index, GetRegister(instr.gpr0)); 237 SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
238 SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
239 case Tegra::Shader::StoreType::Bits64:
240 SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
241 case Tegra::Shader::StoreType::Bits32:
242 SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
212 break; 243 break;
213 default: 244 default:
214 UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", 245 UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
@@ -325,15 +356,18 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
325 const auto& sampler = 356 const auto& sampler =
326 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); 357 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
327 358
359 u32 indexer = 0;
328 switch (instr.txq.query_type) { 360 switch (instr.txq.query_type) {
329 case Tegra::Shader::TextureQueryType::Dimension: { 361 case Tegra::Shader::TextureQueryType::Dimension: {
330 for (u32 element = 0; element < 4; ++element) { 362 for (u32 element = 0; element < 4; ++element) {
331 MetaTexture meta{sampler, element}; 363 if (instr.txq.IsComponentEnabled(element)) {
332 const Node value = Operation(OperationCode::F4TextureQueryDimensions, 364 MetaTexture meta{sampler, element};
333 std::move(meta), GetRegister(instr.gpr8)); 365 const Node value = Operation(OperationCode::F4TextureQueryDimensions,
334 SetTemporal(bb, element, value); 366 std::move(meta), GetRegister(instr.gpr8));
367 SetTemporal(bb, indexer++, value);
368 }
335 } 369 }
336 for (u32 i = 0; i < 4; ++i) { 370 for (u32 i = 0; i < indexer; ++i) {
337 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 371 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
338 } 372 }
339 break; 373 break;
@@ -734,4 +768,4 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
734 return {coord_count, total_coord_count}; 768 return {coord_count, total_coord_count};
735} 769}
736 770
737} // namespace VideoCommon::Shader \ No newline at end of file 771} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 8f97512ee..1d4fbef53 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -236,6 +236,11 @@ private:
236 236
237class ConstBuffer { 237class ConstBuffer {
238public: 238public:
239 explicit ConstBuffer(u32 max_offset, bool is_indirect)
240 : max_offset{max_offset}, is_indirect{is_indirect} {}
241
242 ConstBuffer() = default;
243
239 void MarkAsUsed(u64 offset) { 244 void MarkAsUsed(u64 offset) {
240 max_offset = std::max(max_offset, static_cast<u32>(offset)); 245 max_offset = std::max(max_offset, static_cast<u32>(offset));
241 } 246 }
@@ -252,6 +257,10 @@ public:
252 return max_offset + sizeof(float); 257 return max_offset + sizeof(float);
253 } 258 }
254 259
260 u32 GetMaxOffset() const {
261 return max_offset;
262 }
263
255private: 264private:
256 u32 max_offset{}; 265 u32 max_offset{};
257 bool is_indirect{}; 266 bool is_indirect{};
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e7c78bee2..0fc5530f2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -182,7 +182,7 @@ struct TICEntry {
182 }; 182 };
183 union { 183 union {
184 BitField<0, 16, u32> height_minus_1; 184 BitField<0, 16, u32> height_minus_1;
185 BitField<16, 15, u32> depth_minus_1; 185 BitField<16, 14, u32> depth_minus_1;
186 }; 186 };
187 union { 187 union {
188 BitField<6, 13, u32> mip_lod_bias; 188 BitField<6, 13, u32> mip_lod_bias;
@@ -317,7 +317,6 @@ struct FullTextureInfo {
317 u32 index; 317 u32 index;
318 TICEntry tic; 318 TICEntry tic;
319 TSCEntry tsc; 319 TSCEntry tsc;
320 bool enabled;
321}; 320};
322 321
323/// Returns the number of bytes per pixel of the input texture format. 322/// Returns the number of bytes per pixel of the input texture format.
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 0b8ccdd44..cb82ecf3f 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -11,8 +11,9 @@
11 11
12namespace VideoCore { 12namespace VideoCore {
13 13
14std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) { 14std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
15 return std::make_unique<OpenGL::RendererOpenGL>(emu_window); 15 Core::System& system) {
16 return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
16} 17}
17 18
18u16 GetResolutionScaleFactor(const RendererBase& renderer) { 19u16 GetResolutionScaleFactor(const RendererBase& renderer) {
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 5b373bcb1..3c583f195 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -6,6 +6,10 @@
6 6
7#include <memory> 7#include <memory>
8 8
9namespace Core {
10class System;
11}
12
9namespace Core::Frontend { 13namespace Core::Frontend {
10class EmuWindow; 14class EmuWindow;
11} 15}
@@ -20,7 +24,8 @@ class RendererBase;
20 * @note The returned renderer instance is simply allocated. Its Init() 24 * @note The returned renderer instance is simply allocated. Its Init()
21 * function still needs to be called to fully complete its setup. 25 * function still needs to be called to fully complete its setup.
22 */ 26 */
23std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window); 27std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
28 Core::System& system);
24 29
25u16 GetResolutionScaleFactor(const RendererBase& renderer); 30u16 GetResolutionScaleFactor(const RendererBase& renderer);
26 31
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index f74cb693a..73b04b749 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -29,6 +29,15 @@ void EmuThread::run() {
29 29
30 stop_run = false; 30 stop_run = false;
31 31
32 emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0);
33
34 Core::System::GetInstance().Renderer().Rasterizer().LoadDiskResources(
35 stop_run, [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) {
36 emit LoadProgress(stage, value, total);
37 });
38
39 emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0);
40
32 // holds whether the cpu was running during the last iteration, 41 // holds whether the cpu was running during the last iteration,
33 // so that the DebugModeLeft signal can be emitted before the 42 // so that the DebugModeLeft signal can be emitted before the
34 // next execution step 43 // next execution step
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index d1f37e503..7226e690e 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -22,6 +22,10 @@ class GGLWidgetInternal;
22class GMainWindow; 22class GMainWindow;
23class GRenderWindow; 23class GRenderWindow;
24 24
25namespace VideoCore {
26enum class LoadCallbackStage;
27}
28
25class EmuThread : public QThread { 29class EmuThread : public QThread {
26 Q_OBJECT 30 Q_OBJECT
27 31
@@ -75,7 +79,7 @@ public:
75private: 79private:
76 bool exec_step = false; 80 bool exec_step = false;
77 bool running = false; 81 bool running = false;
78 std::atomic<bool> stop_run{false}; 82 std::atomic_bool stop_run{false};
79 std::mutex running_mutex; 83 std::mutex running_mutex;
80 std::condition_variable running_cv; 84 std::condition_variable running_cv;
81 85
@@ -101,6 +105,8 @@ signals:
101 void DebugModeLeft(); 105 void DebugModeLeft();
102 106
103 void ErrorThrown(Core::System::ResultStatus, std::string); 107 void ErrorThrown(Core::System::ResultStatus, std::string);
108
109 void LoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total);
104}; 110};
105 111
106class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow { 112class GRenderWindow : public QWidget, public Core::Frontend::EmuWindow {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index ddf4cf552..e9546dadf 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -370,6 +370,8 @@ void Config::ReadValues() {
370 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat(); 370 Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
371 Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool(); 371 Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
372 Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt(); 372 Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
373 Settings::values.use_disk_shader_cache =
374 qt_config->value("use_disk_shader_cache", false).toBool();
373 Settings::values.use_accurate_gpu_emulation = 375 Settings::values.use_accurate_gpu_emulation =
374 qt_config->value("use_accurate_gpu_emulation", false).toBool(); 376 qt_config->value("use_accurate_gpu_emulation", false).toBool();
375 377
@@ -629,6 +631,7 @@ void Config::SaveValues() {
629 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor); 631 qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
630 qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit); 632 qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
631 qt_config->setValue("frame_limit", Settings::values.frame_limit); 633 qt_config->setValue("frame_limit", Settings::values.frame_limit);
634 qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache);
632 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); 635 qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation);
633 636
634 // Cast to double because Qt's written float values are not human-readable 637 // Cast to double because Qt's written float values are not human-readable
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index 8290b4384..0f5dd534b 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -62,9 +62,7 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
62 const QColor new_bg_color = QColorDialog::getColor(bg_color); 62 const QColor new_bg_color = QColorDialog::getColor(bg_color);
63 if (!new_bg_color.isValid()) 63 if (!new_bg_color.isValid())
64 return; 64 return;
65 bg_color = new_bg_color; 65 UpdateBackgroundColorButton(new_bg_color);
66 ui->bg_button->setStyleSheet(
67 QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
68 }); 66 });
69} 67}
70 68
@@ -75,11 +73,10 @@ void ConfigureGraphics::setConfiguration() {
75 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); 73 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
76 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit); 74 ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
77 ui->frame_limit->setValue(Settings::values.frame_limit); 75 ui->frame_limit->setValue(Settings::values.frame_limit);
76 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
78 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 77 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
79 bg_color = QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, 78 UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green,
80 Settings::values.bg_blue); 79 Settings::values.bg_blue));
81 ui->bg_button->setStyleSheet(
82 QString("QPushButton { background-color: %1 }").arg(bg_color.name()));
83} 80}
84 81
85void ConfigureGraphics::applyConfiguration() { 82void ConfigureGraphics::applyConfiguration() {
@@ -87,8 +84,19 @@ void ConfigureGraphics::applyConfiguration() {
87 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); 84 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
88 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked(); 85 Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
89 Settings::values.frame_limit = ui->frame_limit->value(); 86 Settings::values.frame_limit = ui->frame_limit->value();
87 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
90 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 88 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
91 Settings::values.bg_red = static_cast<float>(bg_color.redF()); 89 Settings::values.bg_red = static_cast<float>(bg_color.redF());
92 Settings::values.bg_green = static_cast<float>(bg_color.greenF()); 90 Settings::values.bg_green = static_cast<float>(bg_color.greenF());
93 Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); 91 Settings::values.bg_blue = static_cast<float>(bg_color.blueF());
94} 92}
93
94void ConfigureGraphics::UpdateBackgroundColorButton(QColor color) {
95 bg_color = color;
96
97 QPixmap pixmap(ui->bg_button->size());
98 pixmap.fill(bg_color);
99
100 const QIcon color_icon(pixmap);
101 ui->bg_button->setIcon(color_icon);
102}
diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h
index d6ffc6fde..f2799822d 100644
--- a/src/yuzu/configuration/configure_graphics.h
+++ b/src/yuzu/configuration/configure_graphics.h
@@ -23,6 +23,8 @@ public:
23private: 23private:
24 void setConfiguration(); 24 void setConfiguration();
25 25
26 void UpdateBackgroundColorButton(QColor color);
27
26 std::unique_ptr<Ui::ConfigureGraphics> ui; 28 std::unique_ptr<Ui::ConfigureGraphics> ui;
27 QColor bg_color; 29 QColor bg_color;
28}; 30};
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index e278cdd05..824f5810a 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -50,6 +50,13 @@
50 </layout> 50 </layout>
51 </item> 51 </item>
52 <item> 52 <item>
53 <widget class="QCheckBox" name="use_disk_shader_cache">
54 <property name="text">
55 <string>Use disk shader cache</string>
56 </property>
57 </widget>
58 </item>
59 <item>
53 <widget class="QCheckBox" name="use_accurate_gpu_emulation"> 60 <widget class="QCheckBox" name="use_accurate_gpu_emulation">
54 <property name="text"> 61 <property name="text">
55 <string>Use accurate GPU emulation (slow)</string> 62 <string>Use accurate GPU emulation (slow)</string>
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 907aac4f1..86f6d0165 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -43,6 +43,7 @@ QProgressBar {
43} 43}
44QProgressBar::chunk { 44QProgressBar::chunk {
45 background-color: #0ab9e6; 45 background-color: #0ab9e6;
46 width: 1px;
46})"; 47})";
47 48
48constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"( 49constexpr const char PROGRESSBAR_STYLE_BUILD[] = R"(
@@ -53,7 +54,8 @@ QProgressBar {
53 padding: 2px; 54 padding: 2px;
54} 55}
55QProgressBar::chunk { 56QProgressBar::chunk {
56 background-color: #ff3c28; 57 background-color: #ff3c28;
58 width: 1px;
57})"; 59})";
58 60
59constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"( 61constexpr const char PROGRESSBAR_STYLE_COMPLETE[] = R"(
diff --git a/src/yuzu/loading_screen.ui b/src/yuzu/loading_screen.ui
index a67d273fd..820b47536 100644
--- a/src/yuzu/loading_screen.ui
+++ b/src/yuzu/loading_screen.ui
@@ -132,7 +132,7 @@ border-radius: 15px;
132font: 75 15pt &quot;Arial&quot;;</string> 132font: 75 15pt &quot;Arial&quot;;</string>
133 </property> 133 </property>
134 <property name="text"> 134 <property name="text">
135 <string>Stage 1 of 2. Estimate Time 5m 4s</string> 135 <string>Estimated Time 5m 4s</string>
136 </property> 136 </property>
137 </widget> 137 </widget>
138 </item> 138 </item>
@@ -146,6 +146,9 @@ font: 75 15pt &quot;Arial&quot;;</string>
146 <property name="text"> 146 <property name="text">
147 <string/> 147 <string/>
148 </property> 148 </property>
149 <property name="alignment">
150 <set>Qt::AlignCenter</set>
151 </property>
149 <property name="margin"> 152 <property name="margin">
150 <number>30</number> 153 <number>30</number>
151 </property> 154 </property>
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ab403b3ac..1d460c189 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -887,6 +887,9 @@ void GMainWindow::BootGame(const QString& filename) {
887 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget, 887 connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
888 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection); 888 &WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
889 889
890 connect(emu_thread.get(), &EmuThread::LoadProgress, loading_screen,
891 &LoadingScreen::OnLoadProgress, Qt::QueuedConnection);
892
890 // Update the GUI 893 // Update the GUI
891 if (ui.action_Single_Window_Mode->isChecked()) { 894 if (ui.action_Single_Window_Mode->isChecked()) {
892 game_list->hide(); 895 game_list->hide();
@@ -1682,12 +1685,16 @@ void GMainWindow::OnToggleFilterBar() {
1682 1685
1683void GMainWindow::OnCaptureScreenshot() { 1686void GMainWindow::OnCaptureScreenshot() {
1684 OnPauseGame(); 1687 OnPauseGame();
1685 const QString path = 1688 QFileDialog png_dialog(this, tr("Capture Screenshot"), UISettings::values.screenshot_path,
1686 QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), 1689 tr("PNG Image (*.png)"));
1687 UISettings::values.screenshot_path, tr("PNG Image (*.png)")); 1690 png_dialog.setAcceptMode(QFileDialog::AcceptSave);
1688 if (!path.isEmpty()) { 1691 png_dialog.setDefaultSuffix("png");
1689 UISettings::values.screenshot_path = QFileInfo(path).path(); 1692 if (png_dialog.exec()) {
1690 render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path); 1693 const QString path = png_dialog.selectedFiles().first();
1694 if (!path.isEmpty()) {
1695 UISettings::values.screenshot_path = QFileInfo(path).path();
1696 render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor, path);
1697 }
1691 } 1698 }
1692 OnStartGame(); 1699 OnStartGame();
1693} 1700}
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 7a77f76e8..ff05b3179 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -350,6 +350,8 @@ void Config::ReadValues() {
350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); 350 Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
351 Settings::values.frame_limit = 351 Settings::values.frame_limit =
352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 352 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
353 Settings::values.use_disk_shader_cache =
354 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
353 Settings::values.use_accurate_gpu_emulation = 355 Settings::values.use_accurate_gpu_emulation =
354 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 356 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
355 357
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index ba51a4a51..a81986f8e 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -110,6 +110,10 @@ use_frame_limit =
110# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default) 110# 1 - 9999: Speed limit as a percentage of target game speed. 100 (default)
111frame_limit = 111frame_limit =
112 112
113# Whether to use disk based shader cache
114# 0 (default): Off, 1 : On
115use_disk_shader_cache =
116
113# Whether to use accurate GPU emulation 117# Whether to use accurate GPU emulation
114# 0 (default): Off (fast), 1 : On (slow) 118# 0 (default): Off (fast), 1 : On (slow)
115use_accurate_gpu_emulation = 119use_accurate_gpu_emulation =
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp
index 806127b12..c34b5467f 100644
--- a/src/yuzu_cmd/yuzu.cpp
+++ b/src/yuzu_cmd/yuzu.cpp
@@ -28,6 +28,7 @@
28#include "core/loader/loader.h" 28#include "core/loader/loader.h"
29#include "core/settings.h" 29#include "core/settings.h"
30#include "core/telemetry_session.h" 30#include "core/telemetry_session.h"
31#include "video_core/renderer_base.h"
31#include "yuzu_cmd/config.h" 32#include "yuzu_cmd/config.h"
32#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 33#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
33 34
@@ -217,6 +218,8 @@ int main(int argc, char** argv) {
217 218
218 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); 219 Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL");
219 220
221 system.Renderer().Rasterizer().LoadDiskResources();
222
220 while (emu_window->IsOpen()) { 223 while (emu_window->IsOpen()) {
221 system.RunLoop(); 224 system.RunLoop();
222 } 225 }