summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt14
-rw-r--r--CMakeModules/GenerateSCMRev.cmake2
-rw-r--r--dist/qt_themes/qdarkstyle/style.qss4
-rw-r--r--dist/yuzu.manifest80
-rw-r--r--externals/CMakeLists.txt26
m---------externals/libressl0
m---------externals/sirit0
-rw-r--r--src/audio_core/audio_renderer.cpp19
-rw-r--r--src/audio_core/audio_renderer.h13
-rw-r--r--src/audio_core/stream.cpp2
-rw-r--r--src/common/CMakeLists.txt13
-rw-r--r--src/common/fiber.cpp226
-rw-r--r--src/common/fiber.h92
-rw-r--r--src/common/spin_lock.cpp54
-rw-r--r--src/common/spin_lock.h21
-rw-r--r--src/common/telemetry.cpp1
-rw-r--r--src/common/thread.h4
-rw-r--r--src/common/uint128.cpp26
-rw-r--r--src/common/uint128.h3
-rw-r--r--src/common/wall_clock.cpp92
-rw-r--r--src/common/wall_clock.h51
-rw-r--r--src/common/x64/cpu_detect.cpp38
-rw-r--r--src/common/x64/cpu_detect.h13
-rw-r--r--src/common/x64/native_clock.cpp95
-rw-r--r--src/common/x64/native_clock.h41
-rw-r--r--src/common/x64/xbyak_abi.h95
-rw-r--r--src/core/CMakeLists.txt6
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.cpp23
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_32.h5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_64.cpp4
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.cpp81
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic_cp15.h126
-rw-r--r--src/core/core_timing_util.cpp15
-rw-r--r--src/core/core_timing_util.h3
-rw-r--r--src/core/file_sys/system_archive/mii_model.cpp2
-rw-r--r--src/core/file_sys/system_archive/shared_font.cpp2
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp47
-rw-r--r--src/core/hle/kernel/memory/memory_manager.cpp5
-rw-r--r--src/core/hle/kernel/process.cpp6
-rw-r--r--src/core/hle/kernel/readable_event.cpp2
-rw-r--r--src/core/hle/kernel/resource_limit.cpp6
-rw-r--r--src/core/hle/service/acc/acc.cpp341
-rw-r--r--src/core/hle/service/acc/acc_aa.cpp4
-rw-r--r--src/core/hle/service/acc/acc_su.cpp34
-rw-r--r--src/core/hle/service/acc/acc_u0.cpp18
-rw-r--r--src/core/hle/service/acc/acc_u1.cpp29
-rw-r--r--src/core/hle/service/am/am.cpp18
-rw-r--r--src/core/hle/service/am/am.h2
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp6
-rw-r--r--src/core/hle/service/am/spsm.cpp16
-rw-r--r--src/core/hle/service/aoc/aoc_u.cpp1
-rw-r--r--src/core/hle/service/bcat/bcat.cpp2
-rw-r--r--src/core/hle/service/bcat/module.cpp3
-rw-r--r--src/core/hle/service/bpc/bpc.cpp20
-rw-r--r--src/core/hle/service/btdrv/btdrv.cpp167
-rw-r--r--src/core/hle/service/btm/btm.cpp147
-rw-r--r--src/core/hle/service/caps/caps.cpp2
-rw-r--r--src/core/hle/service/caps/caps.h76
-rw-r--r--src/core/hle/service/caps/caps_a.cpp2
-rw-r--r--src/core/hle/service/caps/caps_a.h2
-rw-r--r--src/core/hle/service/caps/caps_c.cpp2
-rw-r--r--src/core/hle/service/caps/caps_c.h2
-rw-r--r--src/core/hle/service/caps/caps_sc.cpp2
-rw-r--r--src/core/hle/service/caps/caps_sc.h2
-rw-r--r--src/core/hle/service/caps/caps_ss.cpp2
-rw-r--r--src/core/hle/service/caps/caps_ss.h2
-rw-r--r--src/core/hle/service/caps/caps_su.cpp2
-rw-r--r--src/core/hle/service/caps/caps_su.h2
-rw-r--r--src/core/hle/service/caps/caps_u.cpp26
-rw-r--r--src/core/hle/service/caps/caps_u.h2
-rw-r--r--src/core/hle/service/es/es.cpp47
-rw-r--r--src/core/hle/service/eupld/eupld.cpp1
-rw-r--r--src/core/hle/service/friend/friend.cpp6
-rw-r--r--src/core/hle/service/grc/grc.cpp3
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp8
-rw-r--r--src/core/hle/service/hid/controllers/npad.h10
-rw-r--r--src/core/hle/service/hid/hid.cpp148
-rw-r--r--src/core/hle/service/hid/hid.h16
-rw-r--r--src/core/hle/service/lbl/lbl.cpp1
-rw-r--r--src/core/hle/service/ldn/ldn.cpp1
-rw-r--r--src/core/hle/service/lm/manager.cpp3
-rw-r--r--src/core/hle/service/mig/mig.cpp6
-rw-r--r--src/core/hle/service/mm/mm_u.cpp32
-rw-r--r--src/core/hle/service/ncm/ncm.cpp20
-rw-r--r--src/core/hle/service/nfc/nfc.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp25
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h18
-rw-r--r--src/core/host_timing.cpp206
-rw-r--r--src/core/host_timing.h160
-rw-r--r--src/core/settings.cpp7
-rw-r--r--src/core/settings.h6
-rw-r--r--src/input_common/keyboard.cpp2
-rw-r--r--src/input_common/motion_emu.cpp2
-rw-r--r--src/tests/CMakeLists.txt2
-rw-r--r--src/tests/common/fibers.cpp358
-rw-r--r--src/tests/core/host_timing.cpp142
-rw-r--r--src/video_core/CMakeLists.txt15
-rw-r--r--src/video_core/buffer_cache/buffer_block.h27
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h272
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp26
-rw-r--r--src/video_core/engines/maxwell_3d.h27
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/macro/macro.cpp72
-rw-r--r--src/video_core/macro/macro.h141
-rw-r--r--src/video_core/macro/macro_hle.cpp113
-rw-r--r--src/video_core/macro/macro_hle.h44
-rw-r--r--src/video_core/macro/macro_interpreter.cpp (renamed from src/video_core/macro_interpreter.cpp)199
-rw-r--r--src/video_core/macro/macro_interpreter.h (renamed from src/video_core/macro_interpreter.h)51
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp621
-rw-r--r--src/video_core/macro/macro_jit_x64.h98
-rw-r--r--src/video_core/memory_manager.cpp40
-rw-r--r--src/video_core/memory_manager.h12
-rw-r--r--src/video_core/query_cache.h10
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h253
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.cpp2073
-rw-r--r--src/video_core/renderer_opengl/gl_arb_decompiler.h29
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h49
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp136
-rw-r--r--src/video_core/renderer_opengl/gl_device.h27
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp297
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h52
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp163
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h25
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp56
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h82
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp20
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp34
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp97
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h42
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp143
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp92
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp183
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp167
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h12
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp80
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp36
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h32
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp3
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h2
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/shader/decode/image.cpp26
-rw-r--r--src/video_core/shader/decode/other.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp55
-rw-r--r--src/video_core/shader/memory_util.cpp4
-rw-r--r--src/video_core/shader/node.h75
-rw-r--r--src/video_core/shader/node_helper.h2
-rw-r--r--src/video_core/shader/registry.cpp20
-rw-r--r--src/video_core/shader/registry.h35
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp78
-rw-r--r--src/video_core/shader_cache.h228
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_base.cpp10
-rw-r--r--src/video_core/texture_cache/surface_base.h13
-rw-r--r--src/video_core/texture_cache/surface_params.cpp19
-rw-r--r--src/video_core/texture_cache/texture_cache.h245
-rw-r--r--src/yuzu/CMakeLists.txt4
-rw-r--r--src/yuzu/bootmanager.cpp22
-rw-r--r--src/yuzu/configuration/config.cpp16
-rw-r--r--src/yuzu/configuration/config.h2
-rw-r--r--src/yuzu/configuration/configure_debug.cpp3
-rw-r--r--src/yuzu/configuration/configure_debug.ui13
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp45
-rw-r--r--src/yuzu/configuration/configure_graphics.ui40
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp3
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp3
-rw-r--r--src/yuzu/configuration/configure_service.cpp6
-rw-r--r--src/yuzu/main.cpp35
-rw-r--r--src/yuzu/main.h1
-rw-r--r--src/yuzu/main.ui6
-rw-r--r--src/yuzu/yuzu.rc2
-rw-r--r--src/yuzu_cmd/config.cpp4
-rw-r--r--src/yuzu_cmd/default_ini.h7
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
-rw-r--r--src/yuzu_cmd/yuzu.rc2
-rw-r--r--src/yuzu_tester/config.cpp2
-rw-r--r--src/yuzu_tester/default_ini.h5
-rw-r--r--src/yuzu_tester/service/yuzutest.cpp2
-rw-r--r--src/yuzu_tester/yuzu.rc2
202 files changed, 8544 insertions, 2559 deletions
diff --git a/.gitmodules b/.gitmodules
index 2ec9dda62..9ba8fe207 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,6 +13,9 @@
13[submodule "soundtouch"] 13[submodule "soundtouch"]
14 path = externals/soundtouch 14 path = externals/soundtouch
15 url = https://github.com/citra-emu/ext-soundtouch.git 15 url = https://github.com/citra-emu/ext-soundtouch.git
16[submodule "libressl"]
17 path = externals/libressl
18 url = https://github.com/citra-emu/ext-libressl-portable.git
16[submodule "discord-rpc"] 19[submodule "discord-rpc"]
17 path = externals/discord-rpc 20 path = externals/discord-rpc
18 url = https://github.com/discordapp/discord-rpc.git 21 url = https://github.com/discordapp/discord-rpc.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 61321bf0a..b71071271 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
1cmake_minimum_required(VERSION 3.11) 1cmake_minimum_required(VERSION 3.15)
2 2
3list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") 3list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
4list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") 4list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
@@ -13,7 +13,7 @@ project(yuzu)
13option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) 13option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
14 14
15option(ENABLE_QT "Enable the Qt frontend" ON) 15option(ENABLE_QT "Enable the Qt frontend" ON)
16CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF) 16CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
17 17
18option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) 18option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
19 19
@@ -152,7 +152,6 @@ macro(yuzu_find_packages)
152 "Boost 1.71 boost/1.72.0" 152 "Boost 1.71 boost/1.72.0"
153 "Catch2 2.11 catch2/2.11.0" 153 "Catch2 2.11 catch2/2.11.0"
154 "fmt 6.2 fmt/6.2.0" 154 "fmt 6.2 fmt/6.2.0"
155 "OpenSSL 1.1 openssl/1.1.1f"
156 # can't use until https://github.com/bincrafters/community/issues/1173 155 # can't use until https://github.com/bincrafters/community/issues/1173
157 #"libzip 1.5 libzip/1.5.2@bincrafters/stable" 156 #"libzip 1.5 libzip/1.5.2@bincrafters/stable"
158 "lz4 1.8 lz4/1.9.2" 157 "lz4 1.8 lz4/1.9.2"
@@ -312,15 +311,6 @@ elseif (TARGET Boost::boost)
312 add_library(boost ALIAS Boost::boost) 311 add_library(boost ALIAS Boost::boost)
313endif() 312endif()
314 313
315if (NOT TARGET OpenSSL::SSL)
316 set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
317 add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL)
318endif()
319if (NOT TARGET OpenSSL::Crypto)
320 set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE)
321 add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL)
322endif()
323
324if (TARGET sdl2::sdl2) 314if (TARGET sdl2::sdl2)
325 # imported from the conan generated sdl2Config.cmake 315 # imported from the conan generated sdl2Config.cmake
326 set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE) 316 set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE)
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 83e4e9df2..311ba1c2e 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -51,6 +51,8 @@ endif()
51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) 51# The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
52set(VIDEO_CORE "${SRC_DIR}/src/video_core") 52set(VIDEO_CORE "${SRC_DIR}/src/video_core")
53set(HASH_FILES 53set(HASH_FILES
54 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
54 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 56 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
55 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 57 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
56 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 58 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
diff --git a/dist/qt_themes/qdarkstyle/style.qss b/dist/qt_themes/qdarkstyle/style.qss
index 7d088a719..2d5c9761f 100644
--- a/dist/qt_themes/qdarkstyle/style.qss
+++ b/dist/qt_themes/qdarkstyle/style.qss
@@ -673,10 +673,6 @@ QTabWidget::pane {
673 border-bottom-left-radius: 2px; 673 border-bottom-left-radius: 2px;
674} 674}
675 675
676QTabWidget::tab-bar {
677 overflow: visible;
678}
679
680QTabBar { 676QTabBar {
681 qproperty-drawBase: 0; 677 qproperty-drawBase: 0;
682 border-radius: 3px; 678 border-radius: 3px;
diff --git a/dist/yuzu.manifest b/dist/yuzu.manifest
index fd30b656f..038edff23 100644
--- a/dist/yuzu.manifest
+++ b/dist/yuzu.manifest
@@ -1,24 +1,58 @@
1<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 1<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0"> 2<assembly manifestVersion="1.0"
3 <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3"> 3 xmlns="urn:schemas-microsoft-com:asm.v1"
4 <security> 4 xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
5 <requestedPrivileges> 5 <asmv3:application>
6 <requestedExecutionLevel level="asInvoker" uiAccess="false"/> 6 <asmv3:windowsSettings>
7 </requestedPrivileges> 7 <!-- Windows 7/8/8.1/10 -->
8 </security> 8 <dpiAware
9 </trustInfo> 9 xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">
10 <application xmlns="urn:schemas-microsoft-com:asm.v3"> 10 true/pm
11 <windowsSettings> 11 </dpiAware>
12 <dpiAware xmlns="http://schemas.microsoft.com/SMI/2005/WindowsSettings">True/PM</dpiAware> 12 <!-- Windows 10, version 1607 or later -->
13 <longPathAware xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">true</longPathAware> 13 <dpiAwareness
14 </windowsSettings> 14 xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
15 </application> 15 PerMonitorV2
16 <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1"> 16 </dpiAwareness>
17 <application> 17 <!-- Windows 10, version 1703 or later -->
18 <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/> 18 <gdiScaling
19 <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/> 19 xmlns="http://schemas.microsoft.com/SMI/2017/WindowsSettings">
20 <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/> 20 true
21 <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/> 21 </gdiScaling>
22 </application> 22 <ws2:longPathAware
23 </compatibility> 23 xmlns:ws3="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
24</assembly> \ No newline at end of file 24 true
25 </ws2:longPathAware>
26 </asmv3:windowsSettings>
27 </asmv3:application>
28 <compatibility
29 xmlns="urn:schemas-microsoft-com:compatibility.v1">
30 <application>
31 <!-- Windows 10 -->
32 <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
33 <!-- Windows 8.1 -->
34 <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
35 <!-- Windows 8 -->
36 <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
37 <!-- Windows 7 -->
38 <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
39 </application>
40 </compatibility>
41 <trustInfo
42 xmlns="urn:schemas-microsoft-com:asm.v3">
43 <security>
44 <requestedPrivileges>
45 <!--
46 UAC settings:
47 - app should run at same integrity level as calling process
48 - app does not need to manipulate windows belonging to
49 higher-integrity-level processes
50 -->
51 <requestedExecutionLevel
52 level="asInvoker"
53 uiAccess="false"
54 />
55 </requestedPrivileges>
56 </security>
57 </trustInfo>
58</assembly>
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index df7a5e0a9..b80b27605 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -4,6 +4,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules")
4list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules") 4list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules")
5include(DownloadExternals) 5include(DownloadExternals)
6 6
7# xbyak
8if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
9 add_library(xbyak INTERFACE)
10 target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
11 target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
12endif()
13
7# Catch 14# Catch
8add_library(catch-single-include INTERFACE) 15add_library(catch-single-include INTERFACE)
9target_include_directories(catch-single-include INTERFACE catch/single_include) 16target_include_directories(catch-single-include INTERFACE catch/single_include)
@@ -66,6 +73,15 @@ if (NOT LIBZIP_FOUND)
66endif() 73endif()
67 74
68if (ENABLE_WEB_SERVICE) 75if (ENABLE_WEB_SERVICE)
76 # LibreSSL
77 set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "")
78 add_subdirectory(libressl EXCLUDE_FROM_ALL)
79 target_include_directories(ssl INTERFACE ./libressl/include)
80 target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP)
81 get_directory_property(OPENSSL_LIBRARIES
82 DIRECTORY libressl
83 DEFINITION OPENSSL_LIBS)
84
69 # lurlparser 85 # lurlparser
70 add_subdirectory(lurlparser EXCLUDE_FROM_ALL) 86 add_subdirectory(lurlparser EXCLUDE_FROM_ALL)
71 87
@@ -73,13 +89,5 @@ if (ENABLE_WEB_SERVICE)
73 add_library(httplib INTERFACE) 89 add_library(httplib INTERFACE)
74 target_include_directories(httplib INTERFACE ./httplib) 90 target_include_directories(httplib INTERFACE ./httplib)
75 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) 91 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
76 target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) 92 target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES})
77endif()
78
79if (NOT TARGET xbyak)
80 if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
81 add_library(xbyak INTERFACE)
82 target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
83 target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
84 endif()
85endif() 93endif()
diff --git a/externals/libressl b/externals/libressl
new file mode 160000
Subproject 7d01cb01cb1a926ecb4c9c98b107ef3c26f59df
diff --git a/externals/sirit b/externals/sirit
Subproject a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167c Subproject eefca56afd49379bdebc97ded8b480839f93088
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 50846a854..d64452617 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -180,11 +180,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
180 180
181 // Copy output header 181 // Copy output header
182 UpdateDataHeader response_data{worker_params}; 182 UpdateDataHeader response_data{worker_params};
183 std::vector<u8> output_params(response_data.total_size);
184 if (behavior_info.IsElapsedFrameCountSupported()) { 183 if (behavior_info.IsElapsedFrameCountSupported()) {
185 response_data.frame_count = 0x10; 184 response_data.render_info = sizeof(RendererInfo);
186 response_data.total_size += 0x10; 185 response_data.total_size += sizeof(RendererInfo);
187 } 186 }
187
188 std::vector<u8> output_params(response_data.total_size);
188 std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader)); 189 std::memcpy(output_params.data(), &response_data, sizeof(UpdateDataHeader));
189 190
190 // Copy output memory pool entries 191 // Copy output memory pool entries
@@ -219,6 +220,17 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
219 return Audren::ERR_INVALID_PARAMETERS; 220 return Audren::ERR_INVALID_PARAMETERS;
220 } 221 }
221 222
223 if (behavior_info.IsElapsedFrameCountSupported()) {
224 const std::size_t renderer_info_offset{
225 sizeof(UpdateDataHeader) + response_data.memory_pools_size + response_data.voices_size +
226 response_data.effects_size + response_data.sinks_size +
227 response_data.performance_manager_size + response_data.behavior_size};
228 RendererInfo renderer_info{};
229 renderer_info.elasped_frame_count = elapsed_frame_count;
230 std::memcpy(output_params.data() + renderer_info_offset, &renderer_info,
231 sizeof(RendererInfo));
232 }
233
222 return MakeResult(output_params); 234 return MakeResult(output_params);
223} 235}
224 236
@@ -447,6 +459,7 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
447 } 459 }
448 } 460 }
449 audio_out->QueueBuffer(stream, tag, std::move(buffer)); 461 audio_out->QueueBuffer(stream, tag, std::move(buffer));
462 elapsed_frame_count++;
450} 463}
451 464
452void AudioRenderer::ReleaseAndQueueBuffers() { 465void AudioRenderer::ReleaseAndQueueBuffers() {
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 1f9114c07..f0b691a86 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -196,6 +196,12 @@ struct EffectOutStatus {
196}; 196};
197static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size"); 197static_assert(sizeof(EffectOutStatus) == 0x10, "EffectOutStatus is an invalid size");
198 198
199struct RendererInfo {
200 u64_le elasped_frame_count{};
201 INSERT_PADDING_WORDS(2);
202};
203static_assert(sizeof(RendererInfo) == 0x10, "RendererInfo is an invalid size");
204
199struct UpdateDataHeader { 205struct UpdateDataHeader {
200 UpdateDataHeader() {} 206 UpdateDataHeader() {}
201 207
@@ -209,7 +215,7 @@ struct UpdateDataHeader {
209 mixes_size = 0x0; 215 mixes_size = 0x0;
210 sinks_size = config.sink_count * 0x20; 216 sinks_size = config.sink_count * 0x20;
211 performance_manager_size = 0x10; 217 performance_manager_size = 0x10;
212 frame_count = 0; 218 render_info = 0;
213 total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size + 219 total_size = sizeof(UpdateDataHeader) + behavior_size + memory_pools_size + voices_size +
214 effects_size + sinks_size + performance_manager_size; 220 effects_size + sinks_size + performance_manager_size;
215 } 221 }
@@ -223,8 +229,8 @@ struct UpdateDataHeader {
223 u32_le mixes_size{}; 229 u32_le mixes_size{};
224 u32_le sinks_size{}; 230 u32_le sinks_size{};
225 u32_le performance_manager_size{}; 231 u32_le performance_manager_size{};
226 INSERT_PADDING_WORDS(1); 232 u32_le splitter_size{};
227 u32_le frame_count{}; 233 u32_le render_info{};
228 INSERT_PADDING_WORDS(4); 234 INSERT_PADDING_WORDS(4);
229 u32_le total_size{}; 235 u32_le total_size{};
230}; 236};
@@ -258,6 +264,7 @@ private:
258 std::unique_ptr<AudioOut> audio_out; 264 std::unique_ptr<AudioOut> audio_out;
259 StreamPtr stream; 265 StreamPtr stream;
260 Core::Memory::Memory& memory; 266 Core::Memory::Memory& memory;
267 std::size_t elapsed_frame_count{};
261}; 268};
262 269
263} // namespace AudioCore 270} // namespace AudioCore
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index 4ca98f8ea..ca7cfb030 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -67,7 +67,7 @@ s64 Stream::GetBufferReleaseCycles(const Buffer& buffer) const {
67} 67}
68 68
69static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) { 69static void VolumeAdjustSamples(std::vector<s16>& samples, float game_volume) {
70 const float volume{std::clamp(Settings::values.volume - (1.0f - game_volume), 0.0f, 1.0f)}; 70 const float volume{std::clamp(Settings::Volume() - (1.0f - game_volume), 0.0f, 1.0f)};
71 71
72 if (volume == 1.0f) { 72 if (volume == 1.0f) {
73 return; 73 return;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 24b7a083c..3cc17d0e9 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp
32 DEPENDS 32 DEPENDS
33 # WARNING! It was too much work to try and make a common location for this list, 33 # WARNING! It was too much work to try and make a common location for this list,
34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well 34 # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
35 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
35 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" 37 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
36 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" 38 "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
37 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" 39 "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
@@ -108,6 +110,8 @@ add_library(common STATIC
108 common_types.h 110 common_types.h
109 dynamic_library.cpp 111 dynamic_library.cpp
110 dynamic_library.h 112 dynamic_library.h
113 fiber.cpp
114 fiber.h
111 file_util.cpp 115 file_util.cpp
112 file_util.h 116 file_util.h
113 hash.h 117 hash.h
@@ -141,6 +145,8 @@ add_library(common STATIC
141 scm_rev.cpp 145 scm_rev.cpp
142 scm_rev.h 146 scm_rev.h
143 scope_exit.h 147 scope_exit.h
148 spin_lock.cpp
149 spin_lock.h
144 string_util.cpp 150 string_util.cpp
145 string_util.h 151 string_util.h
146 swap.h 152 swap.h
@@ -161,6 +167,8 @@ add_library(common STATIC
161 vector_math.h 167 vector_math.h
162 virtual_buffer.cpp 168 virtual_buffer.cpp
163 virtual_buffer.h 169 virtual_buffer.h
170 wall_clock.cpp
171 wall_clock.h
164 web_result.h 172 web_result.h
165 zstd_compression.cpp 173 zstd_compression.cpp
166 zstd_compression.h 174 zstd_compression.h
@@ -171,12 +179,15 @@ if(ARCHITECTURE_x86_64)
171 PRIVATE 179 PRIVATE
172 x64/cpu_detect.cpp 180 x64/cpu_detect.cpp
173 x64/cpu_detect.h 181 x64/cpu_detect.h
182 x64/native_clock.cpp
183 x64/native_clock.h
174 x64/xbyak_abi.h 184 x64/xbyak_abi.h
175 x64/xbyak_util.h 185 x64/xbyak_util.h
176 ) 186 )
177endif() 187endif()
178 188
179create_target_directory_groups(common) 189create_target_directory_groups(common)
190find_package(Boost 1.71 COMPONENTS context headers REQUIRED)
180 191
181target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) 192target_link_libraries(common PUBLIC ${Boost_LIBRARIES} fmt::fmt microprofile)
182target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) 193target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
new file mode 100644
index 000000000..f97ad433b
--- /dev/null
+++ b/src/common/fiber.cpp
@@ -0,0 +1,226 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/fiber.h"
7#if defined(_WIN32) || defined(WIN32)
8#include <windows.h>
9#else
10#include <boost/context/detail/fcontext.hpp>
11#endif
12
13namespace Common {
14
15constexpr std::size_t default_stack_size = 256 * 1024; // 256kb
16
17#if defined(_WIN32) || defined(WIN32)
18
19struct Fiber::FiberImpl {
20 LPVOID handle = nullptr;
21 LPVOID rewind_handle = nullptr;
22};
23
24void Fiber::Start() {
25 ASSERT(previous_fiber != nullptr);
26 previous_fiber->guard.unlock();
27 previous_fiber.reset();
28 entry_point(start_parameter);
29 UNREACHABLE();
30}
31
32void Fiber::OnRewind() {
33 ASSERT(impl->handle != nullptr);
34 DeleteFiber(impl->handle);
35 impl->handle = impl->rewind_handle;
36 impl->rewind_handle = nullptr;
37 rewind_point(rewind_parameter);
38 UNREACHABLE();
39}
40
41void Fiber::FiberStartFunc(void* fiber_parameter) {
42 auto fiber = static_cast<Fiber*>(fiber_parameter);
43 fiber->Start();
44}
45
46void Fiber::RewindStartFunc(void* fiber_parameter) {
47 auto fiber = static_cast<Fiber*>(fiber_parameter);
48 fiber->OnRewind();
49}
50
51Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
52 : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
53 impl = std::make_unique<FiberImpl>();
54 impl->handle = CreateFiber(default_stack_size, &FiberStartFunc, this);
55}
56
57Fiber::Fiber() {
58 impl = std::make_unique<FiberImpl>();
59}
60
61Fiber::~Fiber() {
62 if (released) {
63 return;
64 }
65 // Make sure the Fiber is not being used
66 const bool locked = guard.try_lock();
67 ASSERT_MSG(locked, "Destroying a fiber that's still running");
68 if (locked) {
69 guard.unlock();
70 }
71 DeleteFiber(impl->handle);
72}
73
74void Fiber::Exit() {
75 ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
76 if (!is_thread_fiber) {
77 return;
78 }
79 ConvertFiberToThread();
80 guard.unlock();
81 released = true;
82}
83
84void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
85 rewind_point = std::move(rewind_func);
86 rewind_parameter = start_parameter;
87}
88
89void Fiber::Rewind() {
90 ASSERT(rewind_point);
91 ASSERT(impl->rewind_handle == nullptr);
92 impl->rewind_handle = CreateFiber(default_stack_size, &RewindStartFunc, this);
93 SwitchToFiber(impl->rewind_handle);
94}
95
96void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
97 ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
98 ASSERT_MSG(to != nullptr, "Next fiber is null!");
99 to->guard.lock();
100 to->previous_fiber = from;
101 SwitchToFiber(to->impl->handle);
102 ASSERT(from->previous_fiber != nullptr);
103 from->previous_fiber->guard.unlock();
104 from->previous_fiber.reset();
105}
106
107std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
108 std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
109 fiber->guard.lock();
110 fiber->impl->handle = ConvertThreadToFiber(nullptr);
111 fiber->is_thread_fiber = true;
112 return fiber;
113}
114
115#else
116
117struct Fiber::FiberImpl {
118 alignas(64) std::array<u8, default_stack_size> stack;
119 u8* stack_limit;
120 alignas(64) std::array<u8, default_stack_size> rewind_stack;
121 u8* rewind_stack_limit;
122 boost::context::detail::fcontext_t context;
123 boost::context::detail::fcontext_t rewind_context;
124};
125
126void Fiber::Start(boost::context::detail::transfer_t& transfer) {
127 ASSERT(previous_fiber != nullptr);
128 previous_fiber->impl->context = transfer.fctx;
129 previous_fiber->guard.unlock();
130 previous_fiber.reset();
131 entry_point(start_parameter);
132 UNREACHABLE();
133}
134
135void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
136 ASSERT(impl->context != nullptr);
137 impl->context = impl->rewind_context;
138 impl->rewind_context = nullptr;
139 u8* tmp = impl->stack_limit;
140 impl->stack_limit = impl->rewind_stack_limit;
141 impl->rewind_stack_limit = tmp;
142 rewind_point(rewind_parameter);
143 UNREACHABLE();
144}
145
146void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
147 auto fiber = static_cast<Fiber*>(transfer.data);
148 fiber->Start(transfer);
149}
150
151void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
152 auto fiber = static_cast<Fiber*>(transfer.data);
153 fiber->OnRewind(transfer);
154}
155
156Fiber::Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter)
157 : entry_point{std::move(entry_point_func)}, start_parameter{start_parameter} {
158 impl = std::make_unique<FiberImpl>();
159 impl->stack_limit = impl->stack.data();
160 impl->rewind_stack_limit = impl->rewind_stack.data();
161 u8* stack_base = impl->stack_limit + default_stack_size;
162 impl->context =
163 boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
164}
165
166void Fiber::SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter) {
167 rewind_point = std::move(rewind_func);
168 rewind_parameter = start_parameter;
169}
170
171Fiber::Fiber() {
172 impl = std::make_unique<FiberImpl>();
173}
174
175Fiber::~Fiber() {
176 if (released) {
177 return;
178 }
179 // Make sure the Fiber is not being used
180 const bool locked = guard.try_lock();
181 ASSERT_MSG(locked, "Destroying a fiber that's still running");
182 if (locked) {
183 guard.unlock();
184 }
185}
186
187void Fiber::Exit() {
188
189 ASSERT_MSG(is_thread_fiber, "Exitting non main thread fiber");
190 if (!is_thread_fiber) {
191 return;
192 }
193 guard.unlock();
194 released = true;
195}
196
197void Fiber::Rewind() {
198 ASSERT(rewind_point);
199 ASSERT(impl->rewind_context == nullptr);
200 u8* stack_base = impl->rewind_stack_limit + default_stack_size;
201 impl->rewind_context =
202 boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
203 boost::context::detail::jump_fcontext(impl->rewind_context, this);
204}
205
206void Fiber::YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to) {
207 ASSERT_MSG(from != nullptr, "Yielding fiber is null!");
208 ASSERT_MSG(to != nullptr, "Next fiber is null!");
209 to->guard.lock();
210 to->previous_fiber = from;
211 auto transfer = boost::context::detail::jump_fcontext(to->impl->context, to.get());
212 ASSERT(from->previous_fiber != nullptr);
213 from->previous_fiber->impl->context = transfer.fctx;
214 from->previous_fiber->guard.unlock();
215 from->previous_fiber.reset();
216}
217
218std::shared_ptr<Fiber> Fiber::ThreadToFiber() {
219 std::shared_ptr<Fiber> fiber = std::shared_ptr<Fiber>{new Fiber()};
220 fiber->guard.lock();
221 fiber->is_thread_fiber = true;
222 return fiber;
223}
224
225#endif
226} // namespace Common
diff --git a/src/common/fiber.h b/src/common/fiber.h
new file mode 100644
index 000000000..dafc1100e
--- /dev/null
+++ b/src/common/fiber.h
@@ -0,0 +1,92 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <functional>
8#include <memory>
9
10#include "common/common_types.h"
11#include "common/spin_lock.h"
12
13#if !defined(_WIN32) && !defined(WIN32)
14namespace boost::context::detail {
15struct transfer_t;
16}
17#endif
18
19namespace Common {
20
21/**
22 * Fiber class
23 * a fiber is a userspace thread with it's own context. They can be used to
24 * implement coroutines, emulated threading systems and certain asynchronous
25 * patterns.
26 *
27 * This class implements fibers at a low level, thus allowing greater freedom
28 * to implement such patterns. This fiber class is 'threadsafe' only one fiber
29 * can be running at a time and threads will be locked while trying to yield to
30 * a running fiber until it yields. WARNING exchanging two running fibers between
31 * threads will cause a deadlock. In order to prevent a deadlock, each thread should
32 * have an intermediary fiber, you switch to the intermediary fiber of the current
33 * thread and then from it switch to the expected fiber. This way you can exchange
34 * 2 fibers within 2 different threads.
35 */
36class Fiber {
37public:
38 Fiber(std::function<void(void*)>&& entry_point_func, void* start_parameter);
39 ~Fiber();
40
41 Fiber(const Fiber&) = delete;
42 Fiber& operator=(const Fiber&) = delete;
43
44 Fiber(Fiber&&) = default;
45 Fiber& operator=(Fiber&&) = default;
46
47 /// Yields control from Fiber 'from' to Fiber 'to'
48 /// Fiber 'from' must be the currently running fiber.
49 static void YieldTo(std::shared_ptr<Fiber>& from, std::shared_ptr<Fiber>& to);
50 static std::shared_ptr<Fiber> ThreadToFiber();
51
52 void SetRewindPoint(std::function<void(void*)>&& rewind_func, void* start_parameter);
53
54 void Rewind();
55
56 /// Only call from main thread's fiber
57 void Exit();
58
59 /// Changes the start parameter of the fiber. Has no effect if the fiber already started
60 void SetStartParameter(void* new_parameter) {
61 start_parameter = new_parameter;
62 }
63
64private:
65 Fiber();
66
67#if defined(_WIN32) || defined(WIN32)
68 void OnRewind();
69 void Start();
70 static void FiberStartFunc(void* fiber_parameter);
71 static void RewindStartFunc(void* fiber_parameter);
72#else
73 void OnRewind(boost::context::detail::transfer_t& transfer);
74 void Start(boost::context::detail::transfer_t& transfer);
75 static void FiberStartFunc(boost::context::detail::transfer_t transfer);
76 static void RewindStartFunc(boost::context::detail::transfer_t transfer);
77#endif
78
79 struct FiberImpl;
80
81 SpinLock guard{};
82 std::function<void(void*)> entry_point;
83 std::function<void(void*)> rewind_point;
84 void* rewind_parameter{};
85 void* start_parameter{};
86 std::shared_ptr<Fiber> previous_fiber;
87 std::unique_ptr<FiberImpl> impl;
88 bool is_thread_fiber{};
89 bool released{};
90};
91
92} // namespace Common
diff --git a/src/common/spin_lock.cpp b/src/common/spin_lock.cpp
new file mode 100644
index 000000000..c7b46aac6
--- /dev/null
+++ b/src/common/spin_lock.cpp
@@ -0,0 +1,54 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/spin_lock.h"
6
7#if _MSC_VER
8#include <intrin.h>
9#if _M_AMD64
10#define __x86_64__ 1
11#endif
12#if _M_ARM64
13#define __aarch64__ 1
14#endif
15#else
16#if __x86_64__
17#include <xmmintrin.h>
18#endif
19#endif
20
21namespace {
22
23void thread_pause() {
24#if __x86_64__
25 _mm_pause();
26#elif __aarch64__ && _MSC_VER
27 __yield();
28#elif __aarch64__
29 asm("yield");
30#endif
31}
32
33} // namespace
34
35namespace Common {
36
37void SpinLock::lock() {
38 while (lck.test_and_set(std::memory_order_acquire)) {
39 thread_pause();
40 }
41}
42
43void SpinLock::unlock() {
44 lck.clear(std::memory_order_release);
45}
46
47bool SpinLock::try_lock() {
48 if (lck.test_and_set(std::memory_order_acquire)) {
49 return false;
50 }
51 return true;
52}
53
54} // namespace Common
diff --git a/src/common/spin_lock.h b/src/common/spin_lock.h
new file mode 100644
index 000000000..70282a961
--- /dev/null
+++ b/src/common/spin_lock.h
@@ -0,0 +1,21 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8
9namespace Common {
10
11class SpinLock {
12public:
13 void lock();
14 void unlock();
15 bool try_lock();
16
17private:
18 std::atomic_flag lck = ATOMIC_FLAG_INIT;
19};
20
21} // namespace Common
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index 200c6489a..16d42facd 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -60,6 +60,7 @@ void AppendCPUInfo(FieldCollection& fc) {
60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); 60 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); 61 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); 62 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX512", Common::GetCPUCaps().avx512);
63 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1); 64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI1", Common::GetCPUCaps().bmi1);
64 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2); 65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_BMI2", Common::GetCPUCaps().bmi2);
65 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma); 66 fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_FMA", Common::GetCPUCaps().fma);
diff --git a/src/common/thread.h b/src/common/thread.h
index 2fc071685..127cc7e23 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <mutex> 10#include <mutex>
11#include <thread> 11#include <thread>
12#include "common/common_types.h"
12 13
13namespace Common { 14namespace Common {
14 15
@@ -28,8 +29,7 @@ public:
28 is_set = false; 29 is_set = false;
29 } 30 }
30 31
31 template <class Duration> 32 bool WaitFor(const std::chrono::nanoseconds& time) {
32 bool WaitFor(const std::chrono::duration<Duration>& time) {
33 std::unique_lock lk{mutex}; 33 std::unique_lock lk{mutex};
34 if (!condvar.wait_for(lk, time, [this] { return is_set; })) 34 if (!condvar.wait_for(lk, time, [this] { return is_set; }))
35 return false; 35 return false;
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 32bf56730..16bf7c828 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -6,12 +6,38 @@
6#include <intrin.h> 6#include <intrin.h>
7 7
8#pragma intrinsic(_umul128) 8#pragma intrinsic(_umul128)
9#pragma intrinsic(_udiv128)
9#endif 10#endif
10#include <cstring> 11#include <cstring>
11#include "common/uint128.h" 12#include "common/uint128.h"
12 13
13namespace Common { 14namespace Common {
14 15
16#ifdef _MSC_VER
17
18u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
19 u128 r{};
20 r[0] = _umul128(a, b, &r[1]);
21 u64 remainder;
22#if _MSC_VER < 1923
23 return udiv128(r[1], r[0], d, &remainder);
24#else
25 return _udiv128(r[1], r[0], d, &remainder);
26#endif
27}
28
29#else
30
31u64 MultiplyAndDivide64(u64 a, u64 b, u64 d) {
32 const u64 diva = a / d;
33 const u64 moda = a % d;
34 const u64 divb = b / d;
35 const u64 modb = b % d;
36 return diva * b + moda * divb + moda * modb / d;
37}
38
39#endif
40
15u128 Multiply64Into128(u64 a, u64 b) { 41u128 Multiply64Into128(u64 a, u64 b) {
16 u128 result; 42 u128 result;
17#ifdef _MSC_VER 43#ifdef _MSC_VER
diff --git a/src/common/uint128.h b/src/common/uint128.h
index a3be2a2cb..503cd2d0c 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -9,6 +9,9 @@
9 9
10namespace Common { 10namespace Common {
11 11
12// This function multiplies 2 u64 values and divides it by a u64 value.
13u64 MultiplyAndDivide64(u64 a, u64 b, u64 d);
14
12// This function multiplies 2 u64 values and produces a u128 value; 15// This function multiplies 2 u64 values and produces a u128 value;
13u128 Multiply64Into128(u64 a, u64 b); 16u128 Multiply64Into128(u64 a, u64 b);
14 17
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
new file mode 100644
index 000000000..d4d35f4e7
--- /dev/null
+++ b/src/common/wall_clock.cpp
@@ -0,0 +1,92 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/uint128.h"
6#include "common/wall_clock.h"
7
8#ifdef ARCHITECTURE_x86_64
9#include "common/x64/cpu_detect.h"
10#include "common/x64/native_clock.h"
11#endif
12
13namespace Common {
14
15using base_timer = std::chrono::steady_clock;
16using base_time_point = std::chrono::time_point<base_timer>;
17
18class StandardWallClock : public WallClock {
19public:
20 StandardWallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency)
21 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, false) {
22 start_time = base_timer::now();
23 }
24
25 std::chrono::nanoseconds GetTimeNS() override {
26 base_time_point current = base_timer::now();
27 auto elapsed = current - start_time;
28 return std::chrono::duration_cast<std::chrono::nanoseconds>(elapsed);
29 }
30
31 std::chrono::microseconds GetTimeUS() override {
32 base_time_point current = base_timer::now();
33 auto elapsed = current - start_time;
34 return std::chrono::duration_cast<std::chrono::microseconds>(elapsed);
35 }
36
37 std::chrono::milliseconds GetTimeMS() override {
38 base_time_point current = base_timer::now();
39 auto elapsed = current - start_time;
40 return std::chrono::duration_cast<std::chrono::milliseconds>(elapsed);
41 }
42
43 u64 GetClockCycles() override {
44 std::chrono::nanoseconds time_now = GetTimeNS();
45 const u128 temporary =
46 Common::Multiply64Into128(time_now.count(), emulated_clock_frequency);
47 return Common::Divide128On32(temporary, 1000000000).first;
48 }
49
50 u64 GetCPUCycles() override {
51 std::chrono::nanoseconds time_now = GetTimeNS();
52 const u128 temporary = Common::Multiply64Into128(time_now.count(), emulated_cpu_frequency);
53 return Common::Divide128On32(temporary, 1000000000).first;
54 }
55
56private:
57 base_time_point start_time;
58};
59
60#ifdef ARCHITECTURE_x86_64
61
62std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
63 u32 emulated_clock_frequency) {
64 const auto& caps = GetCPUCaps();
65 u64 rtsc_frequency = 0;
66 if (caps.invariant_tsc) {
67 if (caps.base_frequency != 0) {
68 rtsc_frequency = static_cast<u64>(caps.base_frequency) * 1000000U;
69 }
70 if (rtsc_frequency == 0) {
71 rtsc_frequency = EstimateRDTSCFrequency();
72 }
73 }
74 if (rtsc_frequency == 0) {
75 return std::make_unique<StandardWallClock>(emulated_cpu_frequency,
76 emulated_clock_frequency);
77 } else {
78 return std::make_unique<X64::NativeClock>(emulated_cpu_frequency, emulated_clock_frequency,
79 rtsc_frequency);
80 }
81}
82
83#else
84
85std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
86 u32 emulated_clock_frequency) {
87 return std::make_unique<StandardWallClock>(emulated_cpu_frequency, emulated_clock_frequency);
88}
89
90#endif
91
92} // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
new file mode 100644
index 000000000..ed284cf50
--- /dev/null
+++ b/src/common/wall_clock.h
@@ -0,0 +1,51 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <memory>
9
10#include "common/common_types.h"
11
12namespace Common {
13
14class WallClock {
15public:
16 /// Returns current wall time in nanoseconds
17 virtual std::chrono::nanoseconds GetTimeNS() = 0;
18
19 /// Returns current wall time in microseconds
20 virtual std::chrono::microseconds GetTimeUS() = 0;
21
22 /// Returns current wall time in milliseconds
23 virtual std::chrono::milliseconds GetTimeMS() = 0;
24
25 /// Returns current wall time in emulated clock cycles
26 virtual u64 GetClockCycles() = 0;
27
28 /// Returns current wall time in emulated cpu cycles
29 virtual u64 GetCPUCycles() = 0;
30
31 /// Tells if the wall clock, uses the host CPU's hardware clock
32 bool IsNative() const {
33 return is_native;
34 }
35
36protected:
37 WallClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, bool is_native)
38 : emulated_cpu_frequency{emulated_cpu_frequency},
39 emulated_clock_frequency{emulated_clock_frequency}, is_native{is_native} {}
40
41 u64 emulated_cpu_frequency;
42 u64 emulated_clock_frequency;
43
44private:
45 bool is_native;
46};
47
48std::unique_ptr<WallClock> CreateBestMatchingClock(u32 emulated_cpu_frequency,
49 u32 emulated_clock_frequency);
50
51} // namespace Common
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index c9349a6b4..fccd2eee5 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -62,6 +62,17 @@ static CPUCaps Detect() {
62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int)); 62 std::memcpy(&caps.brand_string[0], &cpu_id[1], sizeof(int));
63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int)); 63 std::memcpy(&caps.brand_string[4], &cpu_id[3], sizeof(int));
64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int)); 64 std::memcpy(&caps.brand_string[8], &cpu_id[2], sizeof(int));
65 if (cpu_id[1] == 0x756e6547 && cpu_id[2] == 0x6c65746e && cpu_id[3] == 0x49656e69)
66 caps.manufacturer = Manufacturer::Intel;
67 else if (cpu_id[1] == 0x68747541 && cpu_id[2] == 0x444d4163 && cpu_id[3] == 0x69746e65)
68 caps.manufacturer = Manufacturer::AMD;
69 else if (cpu_id[1] == 0x6f677948 && cpu_id[2] == 0x656e6975 && cpu_id[3] == 0x6e65476e)
70 caps.manufacturer = Manufacturer::Hygon;
71 else
72 caps.manufacturer = Manufacturer::Unknown;
73
74 u32 family = {};
75 u32 model = {};
65 76
66 __cpuid(cpu_id, 0x80000000); 77 __cpuid(cpu_id, 0x80000000);
67 78
@@ -73,6 +84,14 @@ static CPUCaps Detect() {
73 // Detect family and other miscellaneous features 84 // Detect family and other miscellaneous features
74 if (max_std_fn >= 1) { 85 if (max_std_fn >= 1) {
75 __cpuid(cpu_id, 0x00000001); 86 __cpuid(cpu_id, 0x00000001);
87 family = (cpu_id[0] >> 8) & 0xf;
88 model = (cpu_id[0] >> 4) & 0xf;
89 if (family == 0xf) {
90 family += (cpu_id[0] >> 20) & 0xff;
91 }
92 if (family >= 6) {
93 model += ((cpu_id[0] >> 16) & 0xf) << 4;
94 }
76 95
77 if ((cpu_id[3] >> 25) & 1) 96 if ((cpu_id[3] >> 25) & 1)
78 caps.sse = true; 97 caps.sse = true;
@@ -110,6 +129,11 @@ static CPUCaps Detect() {
110 caps.bmi1 = true; 129 caps.bmi1 = true;
111 if ((cpu_id[1] >> 8) & 1) 130 if ((cpu_id[1] >> 8) & 1)
112 caps.bmi2 = true; 131 caps.bmi2 = true;
132 // Checks for AVX512F, AVX512CD, AVX512VL, AVX512DQ, AVX512BW (Intel Skylake-X/SP)
133 if ((cpu_id[1] >> 16) & 1 && (cpu_id[1] >> 28) & 1 && (cpu_id[1] >> 31) & 1 &&
134 (cpu_id[1] >> 17) & 1 && (cpu_id[1] >> 30) & 1) {
135 caps.avx512 = caps.avx2;
136 }
113 } 137 }
114 } 138 }
115 139
@@ -130,6 +154,20 @@ static CPUCaps Detect() {
130 caps.fma4 = true; 154 caps.fma4 = true;
131 } 155 }
132 156
157 if (max_ex_fn >= 0x80000007) {
158 __cpuid(cpu_id, 0x80000007);
159 if (cpu_id[3] & (1 << 8)) {
160 caps.invariant_tsc = true;
161 }
162 }
163
164 if (max_std_fn >= 0x16) {
165 __cpuid(cpu_id, 0x16);
166 caps.base_frequency = cpu_id[0];
167 caps.max_frequency = cpu_id[1];
168 caps.bus_frequency = cpu_id[2];
169 }
170
133 return caps; 171 return caps;
134} 172}
135 173
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 20f2ba234..e3b63302e 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -6,8 +6,16 @@
6 6
7namespace Common { 7namespace Common {
8 8
9enum class Manufacturer : u32 {
10 Intel = 0,
11 AMD = 1,
12 Hygon = 2,
13 Unknown = 3,
14};
15
9/// x86/x64 CPU capabilities that may be detected by this module 16/// x86/x64 CPU capabilities that may be detected by this module
10struct CPUCaps { 17struct CPUCaps {
18 Manufacturer manufacturer;
11 char cpu_string[0x21]; 19 char cpu_string[0x21];
12 char brand_string[0x41]; 20 char brand_string[0x41];
13 bool sse; 21 bool sse;
@@ -19,11 +27,16 @@ struct CPUCaps {
19 bool lzcnt; 27 bool lzcnt;
20 bool avx; 28 bool avx;
21 bool avx2; 29 bool avx2;
30 bool avx512;
22 bool bmi1; 31 bool bmi1;
23 bool bmi2; 32 bool bmi2;
24 bool fma; 33 bool fma;
25 bool fma4; 34 bool fma4;
26 bool aes; 35 bool aes;
36 bool invariant_tsc;
37 u32 base_frequency;
38 u32 max_frequency;
39 u32 bus_frequency;
27}; 40};
28 41
29/** 42/**
diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp
new file mode 100644
index 000000000..26d4d0ba6
--- /dev/null
+++ b/src/common/x64/native_clock.cpp
@@ -0,0 +1,95 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <thread>
7
8#ifdef _MSC_VER
9#include <intrin.h>
10#else
11#include <x86intrin.h>
12#endif
13
14#include "common/uint128.h"
15#include "common/x64/native_clock.h"
16
17namespace Common {
18
19u64 EstimateRDTSCFrequency() {
20 const auto milli_10 = std::chrono::milliseconds{10};
21 // get current time
22 _mm_mfence();
23 const u64 tscStart = __rdtsc();
24 const auto startTime = std::chrono::high_resolution_clock::now();
25 // wait roughly 3 seconds
26 while (true) {
27 auto milli = std::chrono::duration_cast<std::chrono::milliseconds>(
28 std::chrono::high_resolution_clock::now() - startTime);
29 if (milli.count() >= 3000)
30 break;
31 std::this_thread::sleep_for(milli_10);
32 }
33 const auto endTime = std::chrono::high_resolution_clock::now();
34 _mm_mfence();
35 const u64 tscEnd = __rdtsc();
36 // calculate difference
37 const u64 timer_diff =
38 std::chrono::duration_cast<std::chrono::nanoseconds>(endTime - startTime).count();
39 const u64 tsc_diff = tscEnd - tscStart;
40 const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff);
41 return tsc_freq;
42}
43
44namespace X64 {
45NativeClock::NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency,
46 u64 rtsc_frequency)
47 : WallClock(emulated_cpu_frequency, emulated_clock_frequency, true), rtsc_frequency{
48 rtsc_frequency} {
49 _mm_mfence();
50 last_measure = __rdtsc();
51 accumulated_ticks = 0U;
52}
53
54u64 NativeClock::GetRTSC() {
55 rtsc_serialize.lock();
56 _mm_mfence();
57 const u64 current_measure = __rdtsc();
58 u64 diff = current_measure - last_measure;
59 diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
60 if (current_measure > last_measure) {
61 last_measure = current_measure;
62 }
63 accumulated_ticks += diff;
64 rtsc_serialize.unlock();
65 return accumulated_ticks;
66}
67
68std::chrono::nanoseconds NativeClock::GetTimeNS() {
69 const u64 rtsc_value = GetRTSC();
70 return std::chrono::nanoseconds{MultiplyAndDivide64(rtsc_value, 1000000000, rtsc_frequency)};
71}
72
73std::chrono::microseconds NativeClock::GetTimeUS() {
74 const u64 rtsc_value = GetRTSC();
75 return std::chrono::microseconds{MultiplyAndDivide64(rtsc_value, 1000000, rtsc_frequency)};
76}
77
78std::chrono::milliseconds NativeClock::GetTimeMS() {
79 const u64 rtsc_value = GetRTSC();
80 return std::chrono::milliseconds{MultiplyAndDivide64(rtsc_value, 1000, rtsc_frequency)};
81}
82
83u64 NativeClock::GetClockCycles() {
84 const u64 rtsc_value = GetRTSC();
85 return MultiplyAndDivide64(rtsc_value, emulated_clock_frequency, rtsc_frequency);
86}
87
88u64 NativeClock::GetCPUCycles() {
89 const u64 rtsc_value = GetRTSC();
90 return MultiplyAndDivide64(rtsc_value, emulated_cpu_frequency, rtsc_frequency);
91}
92
93} // namespace X64
94
95} // namespace Common
diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h
new file mode 100644
index 000000000..b58cf9f5a
--- /dev/null
+++ b/src/common/x64/native_clock.h
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/spin_lock.h"
10#include "common/wall_clock.h"
11
12namespace Common {
13
14namespace X64 {
15class NativeClock : public WallClock {
16public:
17 NativeClock(u64 emulated_cpu_frequency, u64 emulated_clock_frequency, u64 rtsc_frequency);
18
19 std::chrono::nanoseconds GetTimeNS() override;
20
21 std::chrono::microseconds GetTimeUS() override;
22
23 std::chrono::milliseconds GetTimeMS() override;
24
25 u64 GetClockCycles() override;
26
27 u64 GetCPUCycles() override;
28
29private:
30 u64 GetRTSC();
31
32 SpinLock rtsc_serialize{};
33 u64 last_measure{};
34 u64 accumulated_ticks{};
35 u64 rtsc_frequency;
36};
37} // namespace X64
38
39u64 EstimateRDTSCFrequency();
40
41} // namespace Common
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
index 794da8a52..a5f5d4fc1 100644
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@@ -11,7 +11,7 @@
11 11
12namespace Common::X64 { 12namespace Common::X64 {
13 13
14inline int RegToIndex(const Xbyak::Reg& reg) { 14inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind; 15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, 16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers."); 17 "RegSet only support GPRs and XMM registers.");
@@ -19,17 +19,17 @@ inline int RegToIndex(const Xbyak::Reg& reg) {
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); 19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20} 20}
21 21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) { 22inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
23 ASSERT(reg_index < 16); 23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index); 24 return Xbyak::Reg64(static_cast<int>(reg_index));
25} 25}
26 26
27inline Xbyak::Xmm IndexToXmm(int reg_index) { 27inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32); 28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16); 29 return Xbyak::Xmm(static_cast<int>(reg_index - 16));
30} 30}
31 31
32inline Xbyak::Reg IndexToReg(int reg_index) { 32inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
33 if (reg_index < 16) { 33 if (reg_index < 16) {
34 return IndexToReg64(reg_index); 34 return IndexToReg64(reg_index);
35 } else { 35 } else {
@@ -151,9 +151,13 @@ constexpr size_t ABI_SHADOW_SPACE = 0;
151 151
152#endif 152#endif
153 153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, 154struct ABIFrameInfo {
155 size_t needed_frame_size, s32* out_subtraction, 155 s32 subtraction;
156 s32* out_xmm_offset) { 156 s32 xmm_offset;
157};
158
159inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
160 size_t needed_frame_size) {
157 const auto count = (regs & ABI_ALL_GPRS).count(); 161 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8; 162 rsp_alignment -= count * 8;
159 size_t subtraction = 0; 163 size_t subtraction = 0;
@@ -170,33 +174,28 @@ inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
170 rsp_alignment -= subtraction; 174 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF; 175 subtraction += rsp_alignment & 0xF;
172 176
173 *out_subtraction = (s32)subtraction; 177 return ABIFrameInfo{static_cast<s32>(subtraction),
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); 178 static_cast<s32>(subtraction - xmm_base_subtraction)};
175} 179}
176 180
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 181inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) { 182 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset; 183 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); 184
181 for (std::size_t i = 0; i < regs.size(); ++i) { 185 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) { 186 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i))); 187 code.push(IndexToReg64(i));
184 } 188 }
185 } 189 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189 190
190 for (int i = 0; i < regs.count(); i++) { 191 if (frame_info.subtraction != 0) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) { 192 code.sub(code.rsp, frame_info.subtraction);
192 code.push(IndexToReg64(i));
193 }
194 } 193 }
195 194
196 for (std::size_t i = 0; i < regs.size(); ++i) { 195 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) { 196 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); 197 code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
199 xmm_offset += 0x10; 198 frame_info.xmm_offset += 0x10;
200 } 199 }
201 } 200 }
202 201
@@ -205,59 +204,23 @@ inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::b
205 204
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, 205inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) { 206 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset; 207 auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210 208
211 for (std::size_t i = 0; i < regs.size(); ++i) { 209 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) { 210 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); 211 code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
214 xmm_offset += 0x10; 212 frame_info.xmm_offset += 0x10;
215 } 213 }
216 } 214 }
217 215
218 if (subtraction != 0) { 216 if (frame_info.subtraction != 0) {
219 code.add(code.rsp, subtraction); 217 code.add(code.rsp, frame_info.subtraction);
220 } 218 }
221 219
222 // GPRs need to be popped in reverse order 220 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) { 221 for (std::size_t j = 0; j < regs.size(); ++j) {
224 if (regs[i]) { 222 const std::size_t i = regs.size() - j - 1;
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) { 223 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i)); 224 code.pop(IndexToReg64(i));
262 } 225 }
263 } 226 }
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 47418006b..efbad628f 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -547,6 +547,8 @@ add_library(core STATIC
547 hle/service/vi/vi_u.h 547 hle/service/vi/vi_u.h
548 hle/service/wlan/wlan.cpp 548 hle/service/wlan/wlan.cpp
549 hle/service/wlan/wlan.h 549 hle/service/wlan/wlan.h
550 host_timing.cpp
551 host_timing.h
550 loader/deconstructed_rom_directory.cpp 552 loader/deconstructed_rom_directory.cpp
551 loader/deconstructed_rom_directory.h 553 loader/deconstructed_rom_directory.h
552 loader/elf.cpp 554 loader/elf.cpp
@@ -606,11 +608,11 @@ endif()
606create_target_directory_groups(core) 608create_target_directory_groups(core)
607 609
608target_link_libraries(core PUBLIC common PRIVATE audio_core video_core) 610target_link_libraries(core PUBLIC common PRIVATE audio_core video_core)
609target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn) 611target_link_libraries(core PUBLIC Boost::boost PRIVATE fmt::fmt nlohmann_json::nlohmann_json mbedtls Opus::Opus unicorn zip)
610 612
611if (YUZU_ENABLE_BOXCAT) 613if (YUZU_ENABLE_BOXCAT)
612 target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT) 614 target_compile_definitions(core PRIVATE -DYUZU_ENABLE_BOXCAT)
613 target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json zip) 615 target_link_libraries(core PRIVATE httplib nlohmann_json::nlohmann_json)
614endif() 616endif()
615 617
616if (ENABLE_WEB_SERVICE) 618if (ENABLE_WEB_SERVICE)
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index 9bc86e3b9..4c8663d03 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -50,7 +50,8 @@ public:
50 } 50 }
51 51
52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override { 52 void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
53 UNIMPLEMENTED(); 53 UNIMPLEMENTED_MSG("This should never happen, pc = {:08X}, code = {:08X}", pc,
54 MemoryReadCode(pc));
54 } 55 }
55 56
56 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { 57 void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
@@ -61,7 +62,7 @@ public:
61 case Dynarmic::A32::Exception::Breakpoint: 62 case Dynarmic::A32::Exception::Breakpoint:
62 break; 63 break;
63 } 64 }
64 LOG_CRITICAL(HW_GPU, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", 65 LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
65 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc)); 66 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
66 UNIMPLEMENTED(); 67 UNIMPLEMENTED();
67 } 68 }
@@ -89,8 +90,6 @@ public:
89 90
90 ARM_Dynarmic_32& parent; 91 ARM_Dynarmic_32& parent;
91 std::size_t num_interpreted_instructions{}; 92 std::size_t num_interpreted_instructions{};
92 u64 tpidrro_el0{};
93 u64 tpidr_el0{};
94}; 93};
95 94
96std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table, 95std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable& page_table,
@@ -99,7 +98,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable&
99 config.callbacks = cb.get(); 98 config.callbacks = cb.get();
100 // TODO(bunnei): Implement page table for 32-bit 99 // TODO(bunnei): Implement page table for 32-bit
101 // config.page_table = &page_table.pointers; 100 // config.page_table = &page_table.pointers;
102 config.coprocessors[15] = std::make_shared<DynarmicCP15>((u32*)&CP15_regs[0]); 101 config.coprocessors[15] = cp15;
103 config.define_unpredictable_behaviour = true; 102 config.define_unpredictable_behaviour = true;
104 return std::make_unique<Dynarmic::A32::Jit>(config); 103 return std::make_unique<Dynarmic::A32::Jit>(config);
105} 104}
@@ -112,13 +111,13 @@ void ARM_Dynarmic_32::Run() {
112} 111}
113 112
114void ARM_Dynarmic_32::Step() { 113void ARM_Dynarmic_32::Step() {
115 cb->InterpreterFallback(jit->Regs()[15], 1); 114 jit->Step();
116} 115}
117 116
118ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor, 117ARM_Dynarmic_32::ARM_Dynarmic_32(System& system, ExclusiveMonitor& exclusive_monitor,
119 std::size_t core_index) 118 std::size_t core_index)
120 : ARM_Interface{system}, 119 : ARM_Interface{system}, cb(std::make_unique<DynarmicCallbacks32>(*this)),
121 cb(std::make_unique<DynarmicCallbacks32>(*this)), core_index{core_index}, 120 cp15(std::make_shared<DynarmicCP15>(*this)), core_index{core_index},
122 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {} 121 exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
123 122
124ARM_Dynarmic_32::~ARM_Dynarmic_32() = default; 123ARM_Dynarmic_32::~ARM_Dynarmic_32() = default;
@@ -154,19 +153,19 @@ void ARM_Dynarmic_32::SetPSTATE(u32 cpsr) {
154} 153}
155 154
156u64 ARM_Dynarmic_32::GetTlsAddress() const { 155u64 ARM_Dynarmic_32::GetTlsAddress() const {
157 return CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; 156 return cp15->uro;
158} 157}
159 158
160void ARM_Dynarmic_32::SetTlsAddress(VAddr address) { 159void ARM_Dynarmic_32::SetTlsAddress(VAddr address) {
161 CP15_regs[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)] = static_cast<u32>(address); 160 cp15->uro = static_cast<u32>(address);
162} 161}
163 162
164u64 ARM_Dynarmic_32::GetTPIDR_EL0() const { 163u64 ARM_Dynarmic_32::GetTPIDR_EL0() const {
165 return cb->tpidr_el0; 164 return cp15->uprw;
166} 165}
167 166
168void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) { 167void ARM_Dynarmic_32::SetTPIDR_EL0(u64 value) {
169 cb->tpidr_el0 = value; 168 cp15->uprw = static_cast<u32>(value);
170} 169}
171 170
172void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) { 171void ARM_Dynarmic_32::SaveContext(ThreadContext32& ctx) {
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index 8ba9cea8f..e5b92d7bb 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -22,6 +22,7 @@ class Memory;
22namespace Core { 22namespace Core {
23 23
24class DynarmicCallbacks32; 24class DynarmicCallbacks32;
25class DynarmicCP15;
25class DynarmicExclusiveMonitor; 26class DynarmicExclusiveMonitor;
26class System; 27class System;
27 28
@@ -66,12 +67,14 @@ private:
66 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>; 67 std::unordered_map<JitCacheKey, std::shared_ptr<Dynarmic::A32::Jit>, Common::PairHash>;
67 68
68 friend class DynarmicCallbacks32; 69 friend class DynarmicCallbacks32;
70 friend class DynarmicCP15;
71
69 std::unique_ptr<DynarmicCallbacks32> cb; 72 std::unique_ptr<DynarmicCallbacks32> cb;
70 JitCacheType jit_cache; 73 JitCacheType jit_cache;
71 std::shared_ptr<Dynarmic::A32::Jit> jit; 74 std::shared_ptr<Dynarmic::A32::Jit> jit;
75 std::shared_ptr<DynarmicCP15> cp15;
72 std::size_t core_index; 76 std::size_t core_index;
73 DynarmicExclusiveMonitor& exclusive_monitor; 77 DynarmicExclusiveMonitor& exclusive_monitor;
74 std::array<u32, 84> CP15_regs{};
75}; 78};
76 79
77} // namespace Core 80} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index 337b97be9..5f5e36d94 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -98,8 +98,8 @@ public:
98 } 98 }
99 [[fallthrough]]; 99 [[fallthrough]];
100 default: 100 default:
101 ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:X})", 101 ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
102 static_cast<std::size_t>(exception), pc); 102 static_cast<std::size_t>(exception), pc, MemoryReadCode(pc));
103 } 103 }
104 } 104 }
105 105
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
index 3fdcdebde..d43e4dd70 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.cpp
@@ -2,79 +2,132 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <fmt/format.h>
6#include "common/logging/log.h"
7#include "core/arm/dynarmic/arm_dynarmic_32.h"
5#include "core/arm/dynarmic/arm_dynarmic_cp15.h" 8#include "core/arm/dynarmic/arm_dynarmic_cp15.h"
9#include "core/core.h"
10#include "core/core_timing.h"
11#include "core/core_timing_util.h"
6 12
7using Callback = Dynarmic::A32::Coprocessor::Callback; 13using Callback = Dynarmic::A32::Coprocessor::Callback;
8using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord; 14using CallbackOrAccessOneWord = Dynarmic::A32::Coprocessor::CallbackOrAccessOneWord;
9using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords; 15using CallbackOrAccessTwoWords = Dynarmic::A32::Coprocessor::CallbackOrAccessTwoWords;
10 16
17template <>
18struct fmt::formatter<Dynarmic::A32::CoprocReg> {
19 constexpr auto parse(format_parse_context& ctx) {
20 return ctx.begin();
21 }
22 template <typename FormatContext>
23 auto format(const Dynarmic::A32::CoprocReg& reg, FormatContext& ctx) {
24 return format_to(ctx.out(), "cp{}", static_cast<size_t>(reg));
25 }
26};
27
28namespace Core {
29
30static u32 dummy_value;
31
11std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1, 32std::optional<Callback> DynarmicCP15::CompileInternalOperation(bool two, unsigned opc1,
12 CoprocReg CRd, CoprocReg CRn, 33 CoprocReg CRd, CoprocReg CRn,
13 CoprocReg CRm, unsigned opc2) { 34 CoprocReg CRm, unsigned opc2) {
35 LOG_CRITICAL(Core_ARM, "CP15: cdp{} p15, {}, {}, {}, {}, {}", two ? "2" : "", opc1, CRd, CRn,
36 CRm, opc2);
14 return {}; 37 return {};
15} 38}
16 39
17CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn, 40CallbackOrAccessOneWord DynarmicCP15::CompileSendOneWord(bool two, unsigned opc1, CoprocReg CRn,
18 CoprocReg CRm, unsigned opc2) { 41 CoprocReg CRm, unsigned opc2) {
19 // TODO(merry): Privileged CP15 registers
20
21 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) { 42 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C5 && opc2 == 4) {
43 // CP15_FLUSH_PREFETCH_BUFFER
22 // This is a dummy write, we ignore the value written here. 44 // This is a dummy write, we ignore the value written here.
23 return &CP15[static_cast<std::size_t>(CP15Register::CP15_FLUSH_PREFETCH_BUFFER)]; 45 return &dummy_value;
24 } 46 }
25 47
26 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) { 48 if (!two && CRn == CoprocReg::C7 && opc1 == 0 && CRm == CoprocReg::C10) {
27 switch (opc2) { 49 switch (opc2) {
28 case 4: 50 case 4:
51 // CP15_DATA_SYNC_BARRIER
29 // This is a dummy write, we ignore the value written here. 52 // This is a dummy write, we ignore the value written here.
30 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_SYNC_BARRIER)]; 53 return &dummy_value;
31 case 5: 54 case 5:
55 // CP15_DATA_MEMORY_BARRIER
32 // This is a dummy write, we ignore the value written here. 56 // This is a dummy write, we ignore the value written here.
33 return &CP15[static_cast<std::size_t>(CP15Register::CP15_DATA_MEMORY_BARRIER)]; 57 return &dummy_value;
34 default:
35 return {};
36 } 58 }
37 } 59 }
38 60
39 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) { 61 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0 && opc2 == 2) {
40 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; 62 // CP15_THREAD_UPRW
63 return &uprw;
41 } 64 }
42 65
66 LOG_CRITICAL(Core_ARM, "CP15: mcr{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
67 opc2);
43 return {}; 68 return {};
44} 69}
45 70
46CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) { 71CallbackOrAccessTwoWords DynarmicCP15::CompileSendTwoWords(bool two, unsigned opc, CoprocReg CRm) {
72 LOG_CRITICAL(Core_ARM, "CP15: mcrr{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
47 return {}; 73 return {};
48} 74}
49 75
50CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn, 76CallbackOrAccessOneWord DynarmicCP15::CompileGetOneWord(bool two, unsigned opc1, CoprocReg CRn,
51 CoprocReg CRm, unsigned opc2) { 77 CoprocReg CRm, unsigned opc2) {
52 // TODO(merry): Privileged CP15 registers
53
54 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) { 78 if (!two && CRn == CoprocReg::C13 && opc1 == 0 && CRm == CoprocReg::C0) {
55 switch (opc2) { 79 switch (opc2) {
56 case 2: 80 case 2:
57 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_UPRW)]; 81 // CP15_THREAD_UPRW
82 return &uprw;
58 case 3: 83 case 3:
59 return &CP15[static_cast<std::size_t>(CP15Register::CP15_THREAD_URO)]; 84 // CP15_THREAD_URO
60 default: 85 return &uro;
61 return {};
62 } 86 }
63 } 87 }
64 88
89 LOG_CRITICAL(Core_ARM, "CP15: mrc{} p15, {}, <Rt>, {}, {}, {}", two ? "2" : "", opc1, CRn, CRm,
90 opc2);
65 return {}; 91 return {};
66} 92}
67 93
68CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) { 94CallbackOrAccessTwoWords DynarmicCP15::CompileGetTwoWords(bool two, unsigned opc, CoprocReg CRm) {
95 if (!two && opc == 0 && CRm == CoprocReg::C14) {
96 // CNTPCT
97 const auto callback = static_cast<u64 (*)(Dynarmic::A32::Jit*, void*, u32, u32)>(
98 [](Dynarmic::A32::Jit*, void* arg, u32, u32) -> u64 {
99 ARM_Dynarmic_32& parent = *(ARM_Dynarmic_32*)arg;
100 return Timing::CpuCyclesToClockCycles(parent.system.CoreTiming().GetTicks());
101 });
102 return Dynarmic::A32::Coprocessor::Callback{callback, (void*)&parent};
103 }
104
105 LOG_CRITICAL(Core_ARM, "CP15: mrrc{} p15, {}, <Rt>, <Rt2>, {}", two ? "2" : "", opc, CRm);
69 return {}; 106 return {};
70} 107}
71 108
72std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd, 109std::optional<Callback> DynarmicCP15::CompileLoadWords(bool two, bool long_transfer, CoprocReg CRd,
73 std::optional<u8> option) { 110 std::optional<u8> option) {
111 if (option) {
112 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
113 long_transfer ? "l" : "", CRd, *option);
114 } else {
115 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
116 long_transfer ? "l" : "", CRd);
117 }
74 return {}; 118 return {};
75} 119}
76 120
77std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, 121std::optional<Callback> DynarmicCP15::CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
78 std::optional<u8> option) { 122 std::optional<u8> option) {
123 if (option) {
124 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...], {}", two ? "2" : "",
125 long_transfer ? "l" : "", CRd, *option);
126 } else {
127 LOG_CRITICAL(Core_ARM, "CP15: mrrc{}{} p15, {}, [...]", two ? "2" : "",
128 long_transfer ? "l" : "", CRd);
129 }
79 return {}; 130 return {};
80} 131}
132
133} // namespace Core
diff --git a/src/core/arm/dynarmic/arm_dynarmic_cp15.h b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
index 07bcde5f9..7356d252e 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@@ -10,128 +10,15 @@
10#include <dynarmic/A32/coprocessor.h> 10#include <dynarmic/A32/coprocessor.h>
11#include "common/common_types.h" 11#include "common/common_types.h"
12 12
13enum class CP15Register { 13namespace Core {
14 // c0 - Information registers
15 CP15_MAIN_ID,
16 CP15_CACHE_TYPE,
17 CP15_TCM_STATUS,
18 CP15_TLB_TYPE,
19 CP15_CPU_ID,
20 CP15_PROCESSOR_FEATURE_0,
21 CP15_PROCESSOR_FEATURE_1,
22 CP15_DEBUG_FEATURE_0,
23 CP15_AUXILIARY_FEATURE_0,
24 CP15_MEMORY_MODEL_FEATURE_0,
25 CP15_MEMORY_MODEL_FEATURE_1,
26 CP15_MEMORY_MODEL_FEATURE_2,
27 CP15_MEMORY_MODEL_FEATURE_3,
28 CP15_ISA_FEATURE_0,
29 CP15_ISA_FEATURE_1,
30 CP15_ISA_FEATURE_2,
31 CP15_ISA_FEATURE_3,
32 CP15_ISA_FEATURE_4,
33 14
34 // c1 - Control registers 15class ARM_Dynarmic_32;
35 CP15_CONTROL,
36 CP15_AUXILIARY_CONTROL,
37 CP15_COPROCESSOR_ACCESS_CONTROL,
38
39 // c2 - Translation table registers
40 CP15_TRANSLATION_BASE_TABLE_0,
41 CP15_TRANSLATION_BASE_TABLE_1,
42 CP15_TRANSLATION_BASE_CONTROL,
43 CP15_DOMAIN_ACCESS_CONTROL,
44 CP15_RESERVED,
45
46 // c5 - Fault status registers
47 CP15_FAULT_STATUS,
48 CP15_INSTR_FAULT_STATUS,
49 CP15_COMBINED_DATA_FSR = CP15_FAULT_STATUS,
50 CP15_INST_FSR,
51
52 // c6 - Fault Address registers
53 CP15_FAULT_ADDRESS,
54 CP15_COMBINED_DATA_FAR = CP15_FAULT_ADDRESS,
55 CP15_WFAR,
56 CP15_IFAR,
57
58 // c7 - Cache operation registers
59 CP15_WAIT_FOR_INTERRUPT,
60 CP15_PHYS_ADDRESS,
61 CP15_INVALIDATE_INSTR_CACHE,
62 CP15_INVALIDATE_INSTR_CACHE_USING_MVA,
63 CP15_INVALIDATE_INSTR_CACHE_USING_INDEX,
64 CP15_FLUSH_PREFETCH_BUFFER,
65 CP15_FLUSH_BRANCH_TARGET_CACHE,
66 CP15_FLUSH_BRANCH_TARGET_CACHE_ENTRY,
67 CP15_INVALIDATE_DATA_CACHE,
68 CP15_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
69 CP15_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
70 CP15_INVALIDATE_DATA_AND_INSTR_CACHE,
71 CP15_CLEAN_DATA_CACHE,
72 CP15_CLEAN_DATA_CACHE_LINE_USING_MVA,
73 CP15_CLEAN_DATA_CACHE_LINE_USING_INDEX,
74 CP15_DATA_SYNC_BARRIER,
75 CP15_DATA_MEMORY_BARRIER,
76 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE,
77 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_MVA,
78 CP15_CLEAN_AND_INVALIDATE_DATA_CACHE_LINE_USING_INDEX,
79
80 // c8 - TLB operations
81 CP15_INVALIDATE_ITLB,
82 CP15_INVALIDATE_ITLB_SINGLE_ENTRY,
83 CP15_INVALIDATE_ITLB_ENTRY_ON_ASID_MATCH,
84 CP15_INVALIDATE_ITLB_ENTRY_ON_MVA,
85 CP15_INVALIDATE_DTLB,
86 CP15_INVALIDATE_DTLB_SINGLE_ENTRY,
87 CP15_INVALIDATE_DTLB_ENTRY_ON_ASID_MATCH,
88 CP15_INVALIDATE_DTLB_ENTRY_ON_MVA,
89 CP15_INVALIDATE_UTLB,
90 CP15_INVALIDATE_UTLB_SINGLE_ENTRY,
91 CP15_INVALIDATE_UTLB_ENTRY_ON_ASID_MATCH,
92 CP15_INVALIDATE_UTLB_ENTRY_ON_MVA,
93
94 // c9 - Data cache lockdown register
95 CP15_DATA_CACHE_LOCKDOWN,
96
97 // c10 - TLB/Memory map registers
98 CP15_TLB_LOCKDOWN,
99 CP15_PRIMARY_REGION_REMAP,
100 CP15_NORMAL_REGION_REMAP,
101
102 // c13 - Thread related registers
103 CP15_PID,
104 CP15_CONTEXT_ID,
105 CP15_THREAD_UPRW, // Thread ID register - User/Privileged Read/Write
106 CP15_THREAD_URO, // Thread ID register - User Read Only (Privileged R/W)
107 CP15_THREAD_PRW, // Thread ID register - Privileged R/W only.
108
109 // c15 - Performance and TLB lockdown registers
110 CP15_PERFORMANCE_MONITOR_CONTROL,
111 CP15_CYCLE_COUNTER,
112 CP15_COUNT_0,
113 CP15_COUNT_1,
114 CP15_READ_MAIN_TLB_LOCKDOWN_ENTRY,
115 CP15_WRITE_MAIN_TLB_LOCKDOWN_ENTRY,
116 CP15_MAIN_TLB_LOCKDOWN_VIRT_ADDRESS,
117 CP15_MAIN_TLB_LOCKDOWN_PHYS_ADDRESS,
118 CP15_MAIN_TLB_LOCKDOWN_ATTRIBUTE,
119 CP15_TLB_DEBUG_CONTROL,
120
121 // Skyeye defined
122 CP15_TLB_FAULT_ADDR,
123 CP15_TLB_FAULT_STATUS,
124
125 // Not an actual register.
126 // All registers should be defined above this.
127 CP15_REGISTER_COUNT,
128};
129 16
130class DynarmicCP15 final : public Dynarmic::A32::Coprocessor { 17class DynarmicCP15 final : public Dynarmic::A32::Coprocessor {
131public: 18public:
132 using CoprocReg = Dynarmic::A32::CoprocReg; 19 using CoprocReg = Dynarmic::A32::CoprocReg;
133 20
134 explicit DynarmicCP15(u32* cp15) : CP15(cp15){}; 21 explicit DynarmicCP15(ARM_Dynarmic_32& parent) : parent(parent) {}
135 22
136 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd, 23 std::optional<Callback> CompileInternalOperation(bool two, unsigned opc1, CoprocReg CRd,
137 CoprocReg CRn, CoprocReg CRm, 24 CoprocReg CRn, CoprocReg CRm,
@@ -147,6 +34,9 @@ public:
147 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd, 34 std::optional<Callback> CompileStoreWords(bool two, bool long_transfer, CoprocReg CRd,
148 std::optional<u8> option) override; 35 std::optional<u8> option) override;
149 36
150private: 37 ARM_Dynarmic_32& parent;
151 u32* CP15{}; 38 u32 uprw;
39 u32 uro;
152}; 40};
41
42} // namespace Core
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index de50d3b14..be34b26fe 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -49,6 +49,21 @@ s64 nsToCycles(std::chrono::nanoseconds ns) {
49 return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000; 49 return (Hardware::BASE_CLOCK_RATE * ns.count()) / 1000000000;
50} 50}
51 51
52u64 msToClockCycles(std::chrono::milliseconds ns) {
53 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
54 return Common::Divide128On32(temp, 1000).first;
55}
56
57u64 usToClockCycles(std::chrono::microseconds ns) {
58 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
59 return Common::Divide128On32(temp, 1000000).first;
60}
61
62u64 nsToClockCycles(std::chrono::nanoseconds ns) {
63 const u128 temp = Common::Multiply64Into128(ns.count(), Hardware::CNTFREQ);
64 return Common::Divide128On32(temp, 1000000000).first;
65}
66
52u64 CpuCyclesToClockCycles(u64 ticks) { 67u64 CpuCyclesToClockCycles(u64 ticks) {
53 const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ); 68 const u128 temporal = Common::Multiply64Into128(ticks, Hardware::CNTFREQ);
54 return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first; 69 return Common::Divide128On32(temporal, static_cast<u32>(Hardware::BASE_CLOCK_RATE)).first;
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index addc72b19..b3c58447d 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -13,6 +13,9 @@ namespace Core::Timing {
13s64 msToCycles(std::chrono::milliseconds ms); 13s64 msToCycles(std::chrono::milliseconds ms);
14s64 usToCycles(std::chrono::microseconds us); 14s64 usToCycles(std::chrono::microseconds us);
15s64 nsToCycles(std::chrono::nanoseconds ns); 15s64 nsToCycles(std::chrono::nanoseconds ns);
16u64 msToClockCycles(std::chrono::milliseconds ns);
17u64 usToClockCycles(std::chrono::microseconds ns);
18u64 nsToClockCycles(std::chrono::nanoseconds ns);
16 19
17inline std::chrono::milliseconds CyclesToMs(s64 cycles) { 20inline std::chrono::milliseconds CyclesToMs(s64 cycles) {
18 return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE); 21 return std::chrono::milliseconds(cycles * 1000 / Hardware::BASE_CLOCK_RATE);
diff --git a/src/core/file_sys/system_archive/mii_model.cpp b/src/core/file_sys/system_archive/mii_model.cpp
index 6a9add87c..61bb67945 100644
--- a/src/core/file_sys/system_archive/mii_model.cpp
+++ b/src/core/file_sys/system_archive/mii_model.cpp
@@ -40,7 +40,7 @@ VirtualDir MiiModel() {
40 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>( 40 out->AddFile(std::make_shared<ArrayVfsFile<MiiModelData::SHAPE_MID.size()>>(
41 MiiModelData::SHAPE_MID, "ShapeMid.dat")); 41 MiiModelData::SHAPE_MID, "ShapeMid.dat"));
42 42
43 return std::move(out); 43 return out;
44} 44}
45 45
46} // namespace FileSys::SystemArchive 46} // namespace FileSys::SystemArchive
diff --git a/src/core/file_sys/system_archive/shared_font.cpp b/src/core/file_sys/system_archive/shared_font.cpp
index 2c05eb42e..c5cdf7d9b 100644
--- a/src/core/file_sys/system_archive/shared_font.cpp
+++ b/src/core/file_sys/system_archive/shared_font.cpp
@@ -23,7 +23,7 @@ VirtualFile PackBFTTF(const std::array<u8, Size>& data, const std::string& name)
23 23
24 std::vector<u8> bfttf(Size + sizeof(u64)); 24 std::vector<u8> bfttf(Size + sizeof(u64));
25 25
26 u64 offset = 0; 26 size_t offset = 0;
27 Service::NS::EncryptSharedFont(vec, bfttf, offset); 27 Service::NS::EncryptSharedFont(vec, bfttf, offset);
28 return std::make_shared<VectorVfsFile>(std::move(bfttf), name); 28 return std::make_shared<VectorVfsFile>(std::move(bfttf), name);
29} 29}
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index ba0eac4c2..0d01a7047 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -282,18 +282,18 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) {
282} 282}
283 283
284std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { 284std::vector<u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
285 std::vector<u8> buffer; 285 std::vector<u8> buffer{};
286 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 286 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
287 BufferDescriptorA()[buffer_index].Size()}; 287 BufferDescriptorA()[buffer_index].Size()};
288 288
289 if (is_buffer_a) { 289 if (is_buffer_a) {
290 ASSERT_MSG(BufferDescriptorA().size() > buffer_index, 290 ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return buffer; },
291 "BufferDescriptorA invalid buffer_index {}", buffer_index); 291 "BufferDescriptorA invalid buffer_index {}", buffer_index);
292 buffer.resize(BufferDescriptorA()[buffer_index].Size()); 292 buffer.resize(BufferDescriptorA()[buffer_index].Size());
293 memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size()); 293 memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), buffer.data(), buffer.size());
294 } else { 294 } else {
295 ASSERT_MSG(BufferDescriptorX().size() > buffer_index, 295 ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return buffer; },
296 "BufferDescriptorX invalid buffer_index {}", buffer_index); 296 "BufferDescriptorX invalid buffer_index {}", buffer_index);
297 buffer.resize(BufferDescriptorX()[buffer_index].Size()); 297 buffer.resize(BufferDescriptorX()[buffer_index].Size());
298 memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size()); 298 memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), buffer.data(), buffer.size());
299 } 299 }
@@ -318,16 +318,16 @@ std::size_t HLERequestContext::WriteBuffer(const void* buffer, std::size_t size,
318 } 318 }
319 319
320 if (is_buffer_b) { 320 if (is_buffer_b) {
321 ASSERT_MSG(BufferDescriptorB().size() > buffer_index, 321 ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index &&
322 "BufferDescriptorB invalid buffer_index {}", buffer_index); 322 BufferDescriptorB()[buffer_index].Size() >= size,
323 ASSERT_MSG(BufferDescriptorB()[buffer_index].Size() >= size, 323 { return 0; }, "BufferDescriptorB is invalid, index={}, size={}",
324 "BufferDescriptorB buffer_index {} is not large enough", buffer_index); 324 buffer_index, size);
325 memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size); 325 memory.WriteBlock(BufferDescriptorB()[buffer_index].Address(), buffer, size);
326 } else { 326 } else {
327 ASSERT_MSG(BufferDescriptorC().size() > buffer_index, 327 ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index &&
328 "BufferDescriptorC invalid buffer_index {}", buffer_index); 328 BufferDescriptorC()[buffer_index].Size() >= size,
329 ASSERT_MSG(BufferDescriptorC()[buffer_index].Size() >= size, 329 { return 0; }, "BufferDescriptorC is invalid, index={}, size={}",
330 "BufferDescriptorC buffer_index {} is not large enough", buffer_index); 330 buffer_index, size);
331 memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size); 331 memory.WriteBlock(BufferDescriptorC()[buffer_index].Address(), buffer, size);
332 } 332 }
333 333
@@ -338,16 +338,12 @@ std::size_t HLERequestContext::GetReadBufferSize(std::size_t buffer_index) const
338 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 338 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
339 BufferDescriptorA()[buffer_index].Size()}; 339 BufferDescriptorA()[buffer_index].Size()};
340 if (is_buffer_a) { 340 if (is_buffer_a) {
341 ASSERT_MSG(BufferDescriptorA().size() > buffer_index, 341 ASSERT_OR_EXECUTE_MSG(BufferDescriptorA().size() > buffer_index, { return 0; },
342 "BufferDescriptorA invalid buffer_index {}", buffer_index); 342 "BufferDescriptorA invalid buffer_index {}", buffer_index);
343 ASSERT_MSG(BufferDescriptorA()[buffer_index].Size() > 0,
344 "BufferDescriptorA buffer_index {} is empty", buffer_index);
345 return BufferDescriptorA()[buffer_index].Size(); 343 return BufferDescriptorA()[buffer_index].Size();
346 } else { 344 } else {
347 ASSERT_MSG(BufferDescriptorX().size() > buffer_index, 345 ASSERT_OR_EXECUTE_MSG(BufferDescriptorX().size() > buffer_index, { return 0; },
348 "BufferDescriptorX invalid buffer_index {}", buffer_index); 346 "BufferDescriptorX invalid buffer_index {}", buffer_index);
349 ASSERT_MSG(BufferDescriptorX()[buffer_index].Size() > 0,
350 "BufferDescriptorX buffer_index {} is empty", buffer_index);
351 return BufferDescriptorX()[buffer_index].Size(); 347 return BufferDescriptorX()[buffer_index].Size();
352 } 348 }
353} 349}
@@ -356,14 +352,15 @@ std::size_t HLERequestContext::GetWriteBufferSize(std::size_t buffer_index) cons
356 const bool is_buffer_b{BufferDescriptorB().size() > buffer_index && 352 const bool is_buffer_b{BufferDescriptorB().size() > buffer_index &&
357 BufferDescriptorB()[buffer_index].Size()}; 353 BufferDescriptorB()[buffer_index].Size()};
358 if (is_buffer_b) { 354 if (is_buffer_b) {
359 ASSERT_MSG(BufferDescriptorB().size() > buffer_index, 355 ASSERT_OR_EXECUTE_MSG(BufferDescriptorB().size() > buffer_index, { return 0; },
360 "BufferDescriptorB invalid buffer_index {}", buffer_index); 356 "BufferDescriptorB invalid buffer_index {}", buffer_index);
361 return BufferDescriptorB()[buffer_index].Size(); 357 return BufferDescriptorB()[buffer_index].Size();
362 } else { 358 } else {
363 ASSERT_MSG(BufferDescriptorC().size() > buffer_index, 359 ASSERT_OR_EXECUTE_MSG(BufferDescriptorC().size() > buffer_index, { return 0; },
364 "BufferDescriptorC invalid buffer_index {}", buffer_index); 360 "BufferDescriptorC invalid buffer_index {}", buffer_index);
365 return BufferDescriptorC()[buffer_index].Size(); 361 return BufferDescriptorC()[buffer_index].Size();
366 } 362 }
363 return 0;
367} 364}
368 365
369std::string HLERequestContext::Description() const { 366std::string HLERequestContext::Description() const {
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp
index 6b432e1b2..acf13585c 100644
--- a/src/core/hle/kernel/memory/memory_manager.cpp
+++ b/src/core/hle/kernel/memory/memory_manager.cpp
@@ -104,7 +104,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
104 // Ensure that we don't leave anything un-freed 104 // Ensure that we don't leave anything un-freed
105 auto group_guard = detail::ScopeExit([&] { 105 auto group_guard = detail::ScopeExit([&] {
106 for (const auto& it : page_list.Nodes()) { 106 for (const auto& it : page_list.Nodes()) {
107 const auto min_num_pages{std::min( 107 const auto min_num_pages{std::min<size_t>(
108 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; 108 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
109 chosen_manager.Free(it.GetAddress(), min_num_pages); 109 chosen_manager.Free(it.GetAddress(), min_num_pages);
110 } 110 }
@@ -139,7 +139,6 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa
139 } 139 }
140 140
141 // Only succeed if we allocated as many pages as we wanted 141 // Only succeed if we allocated as many pages as we wanted
142 ASSERT(num_pages >= 0);
143 if (num_pages) { 142 if (num_pages) {
144 return ERR_OUT_OF_MEMORY; 143 return ERR_OUT_OF_MEMORY;
145 } 144 }
@@ -165,7 +164,7 @@ ResultCode MemoryManager::Free(PageLinkedList& page_list, std::size_t num_pages,
165 164
166 // Free all of the pages 165 // Free all of the pages
167 for (const auto& it : page_list.Nodes()) { 166 for (const auto& it : page_list.Nodes()) {
168 const auto min_num_pages{std::min( 167 const auto min_num_pages{std::min<size_t>(
169 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)}; 168 it.GetNumPages(), (chosen_manager.GetEndAddress() - it.GetAddress()) / PageSize)};
170 chosen_manager.Free(it.GetAddress(), min_num_pages); 169 chosen_manager.Free(it.GetAddress(), min_num_pages);
171 } 170 }
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 36724569f..c4c5199b1 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
132 132
133u64 Process::GetTotalPhysicalMemoryAvailable() const { 133u64 Process::GetTotalPhysicalMemoryAvailable() const {
134 const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + 134 const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
135 page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; 135 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
136 main_thread_stack_size};
136 137
137 if (capacity < memory_usage_capacity) { 138 if (capacity < memory_usage_capacity) {
138 return capacity; 139 return capacity;
@@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
146} 147}
147 148
148u64 Process::GetTotalPhysicalMemoryUsed() const { 149u64 Process::GetTotalPhysicalMemoryUsed() const {
149 return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); 150 return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
151 GetSystemResourceSize();
150} 152}
151 153
152u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { 154u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp
index 00860fcbd..ef5e19e63 100644
--- a/src/core/hle/kernel/readable_event.cpp
+++ b/src/core/hle/kernel/readable_event.cpp
@@ -38,7 +38,7 @@ void ReadableEvent::Clear() {
38 38
39ResultCode ReadableEvent::Reset() { 39ResultCode ReadableEvent::Reset() {
40 if (!is_signaled) { 40 if (!is_signaled) {
41 LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", 41 LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}",
42 GetObjectId(), GetTypeName(), GetName()); 42 GetObjectId(), GetTypeName(), GetName());
43 return ERR_INVALID_STATE; 43 return ERR_INVALID_STATE;
44 } 44 }
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index d9beaa3a4..212e442f4 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
24 const std::size_t index{ResourceTypeToIndex(resource)}; 24 const std::size_t index{ResourceTypeToIndex(resource)};
25 25
26 s64 new_value = current[index] + amount; 26 s64 new_value = current[index] + amount;
27 while (new_value > limit[index] && available[index] + amount <= limit[index]) { 27 if (new_value > limit[index] && available[index] + amount <= limit[index]) {
28 // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout 28 // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
29 new_value = current[index] + amount; 29 new_value = current[index] + amount;
30
31 if (timeout >= 0) {
32 break;
33 }
34 } 30 }
35 31
36 if (new_value <= limit[index]) { 32 if (new_value <= limit[index]) {
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index 630a8b048..94d8c1fc6 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -44,6 +44,218 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
44 return static_cast<u32>(std::min(size, max_jpeg_image_size)); 44 return static_cast<u32>(std::min(size, max_jpeg_image_size));
45} 45}
46 46
47class IManagerForSystemService final : public ServiceFramework<IManagerForSystemService> {
48public:
49 explicit IManagerForSystemService(Common::UUID user_id)
50 : ServiceFramework("IManagerForSystemService") {
51 // clang-format off
52 static const FunctionInfo functions[] = {
53 {0, nullptr, "CheckAvailability"},
54 {1, nullptr, "GetAccountId"},
55 {2, nullptr, "EnsureIdTokenCacheAsync"},
56 {3, nullptr, "LoadIdTokenCache"},
57 {100, nullptr, "SetSystemProgramIdentification"},
58 {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
59 {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
60 {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
61 {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
62 {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
63 {120, nullptr, "GetNintendoAccountId"},
64 {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
65 {130, nullptr, "GetNintendoAccountUserResourceCache"},
66 {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
67 {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
68 {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
69 {134, nullptr, "RefreshNintendoAccountVerificationUrlCache"}, // 9.0.0+
70 {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
71 {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
72 {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
73 {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
74 {150, nullptr, "CreateAuthorizationRequest"},
75 };
76 // clang-format on
77
78 RegisterHandlers(functions);
79 }
80};
81
82// 3.0.0+
83class IFloatingRegistrationRequest final : public ServiceFramework<IFloatingRegistrationRequest> {
84public:
85 explicit IFloatingRegistrationRequest(Common::UUID user_id)
86 : ServiceFramework("IFloatingRegistrationRequest") {
87 // clang-format off
88 static const FunctionInfo functions[] = {
89 {0, nullptr, "GetSessionId"},
90 {12, nullptr, "GetAccountId"},
91 {13, nullptr, "GetLinkedNintendoAccountId"},
92 {14, nullptr, "GetNickname"},
93 {15, nullptr, "GetProfileImage"},
94 {21, nullptr, "LoadIdTokenCache"},
95 {100, nullptr, "RegisterUser"}, // [1.0.0-3.0.2] RegisterAsync
96 {101, nullptr, "RegisterUserWithUid"}, // [1.0.0-3.0.2] RegisterWithUidAsync
97 {102, nullptr, "RegisterNetworkServiceAccountAsync"}, // 4.0.0+
98 {103, nullptr, "RegisterNetworkServiceAccountWithUidAsync"}, // 4.0.0+
99 {110, nullptr, "SetSystemProgramIdentification"},
100 {111, nullptr, "EnsureIdTokenCacheAsync"},
101 };
102 // clang-format on
103
104 RegisterHandlers(functions);
105 }
106};
107
108class IAdministrator final : public ServiceFramework<IAdministrator> {
109public:
110 explicit IAdministrator(Common::UUID user_id) : ServiceFramework("IAdministrator") {
111 // clang-format off
112 static const FunctionInfo functions[] = {
113 {0, nullptr, "CheckAvailability"},
114 {1, nullptr, "GetAccountId"},
115 {2, nullptr, "EnsureIdTokenCacheAsync"},
116 {3, nullptr, "LoadIdTokenCache"},
117 {100, nullptr, "SetSystemProgramIdentification"},
118 {101, nullptr, "RefreshNotificationTokenAsync"}, // 7.0.0+
119 {110, nullptr, "GetServiceEntryRequirementCache"}, // 4.0.0+
120 {111, nullptr, "InvalidateServiceEntryRequirementCache"}, // 4.0.0+
121 {112, nullptr, "InvalidateTokenCache"}, // 4.0.0 - 6.2.0
122 {113, nullptr, "GetServiceEntryRequirementCacheForOnlinePlay"}, // 6.1.0+
123 {120, nullptr, "GetNintendoAccountId"},
124 {121, nullptr, "CalculateNintendoAccountAuthenticationFingerprint"}, // 9.0.0+
125 {130, nullptr, "GetNintendoAccountUserResourceCache"},
126 {131, nullptr, "RefreshNintendoAccountUserResourceCacheAsync"},
127 {132, nullptr, "RefreshNintendoAccountUserResourceCacheAsyncIfSecondsElapsed"},
128 {133, nullptr, "GetNintendoAccountVerificationUrlCache"}, // 9.0.0+
129 {134, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsync"}, // 9.0.0+
130 {135, nullptr, "RefreshNintendoAccountVerificationUrlCacheAsyncIfSecondsElapsed"}, // 9.0.0+
131 {140, nullptr, "GetNetworkServiceLicenseCache"}, // 5.0.0+
132 {141, nullptr, "RefreshNetworkServiceLicenseCacheAsync"}, // 5.0.0+
133 {142, nullptr, "RefreshNetworkServiceLicenseCacheAsyncIfSecondsElapsed"}, // 5.0.0+
134 {150, nullptr, "CreateAuthorizationRequest"},
135 {200, nullptr, "IsRegistered"},
136 {201, nullptr, "RegisterAsync"},
137 {202, nullptr, "UnregisterAsync"},
138 {203, nullptr, "DeleteRegistrationInfoLocally"},
139 {220, nullptr, "SynchronizeProfileAsync"},
140 {221, nullptr, "UploadProfileAsync"},
141 {222, nullptr, "SynchronizaProfileAsyncIfSecondsElapsed"},
142 {250, nullptr, "IsLinkedWithNintendoAccount"},
143 {251, nullptr, "CreateProcedureToLinkWithNintendoAccount"},
144 {252, nullptr, "ResumeProcedureToLinkWithNintendoAccount"},
145 {255, nullptr, "CreateProcedureToUpdateLinkageStateOfNintendoAccount"},
146 {256, nullptr, "ResumeProcedureToUpdateLinkageStateOfNintendoAccount"},
147 {260, nullptr, "CreateProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
148 {261, nullptr, "ResumeProcedureToLinkNnidWithNintendoAccount"}, // 3.0.0+
149 {280, nullptr, "ProxyProcedureToAcquireApplicationAuthorizationForNintendoAccount"},
150 {290, nullptr, "GetRequestForNintendoAccountUserResourceView"}, // 8.0.0+
151 {300, nullptr, "TryRecoverNintendoAccountUserStateAsync"}, // 6.0.0+
152 {400, nullptr, "IsServiceEntryRequirementCacheRefreshRequiredForOnlinePlay"}, // 6.1.0+
153 {401, nullptr, "RefreshServiceEntryRequirementCacheForOnlinePlayAsync"}, // 6.1.0+
154 {900, nullptr, "GetAuthenticationInfoForWin"}, // 9.0.0+
155 {901, nullptr, "ImportAsyncForWin"}, // 9.0.0+
156 {997, nullptr, "DebugUnlinkNintendoAccountAsync"},
157 {998, nullptr, "DebugSetAvailabilityErrorDetail"},
158 };
159 // clang-format on
160
161 RegisterHandlers(functions);
162 }
163};
164
165class IAuthorizationRequest final : public ServiceFramework<IAuthorizationRequest> {
166public:
167 explicit IAuthorizationRequest(Common::UUID user_id)
168 : ServiceFramework("IAuthorizationRequest") {
169 // clang-format off
170 static const FunctionInfo functions[] = {
171 {0, nullptr, "GetSessionId"},
172 {10, nullptr, "InvokeWithoutInteractionAsync"},
173 {19, nullptr, "IsAuthorized"},
174 {20, nullptr, "GetAuthorizationCode"},
175 {21, nullptr, "GetIdToken"},
176 {22, nullptr, "GetState"},
177 };
178 // clang-format on
179
180 RegisterHandlers(functions);
181 }
182};
183
184class IOAuthProcedure final : public ServiceFramework<IOAuthProcedure> {
185public:
186 explicit IOAuthProcedure(Common::UUID user_id) : ServiceFramework("IOAuthProcedure") {
187 // clang-format off
188 static const FunctionInfo functions[] = {
189 {0, nullptr, "PrepareAsync"},
190 {1, nullptr, "GetRequest"},
191 {2, nullptr, "ApplyResponse"},
192 {3, nullptr, "ApplyResponseAsync"},
193 {10, nullptr, "Suspend"},
194 };
195 // clang-format on
196
197 RegisterHandlers(functions);
198 }
199};
200
201// 3.0.0+
202class IOAuthProcedureForExternalNsa final : public ServiceFramework<IOAuthProcedureForExternalNsa> {
203public:
204 explicit IOAuthProcedureForExternalNsa(Common::UUID user_id)
205 : ServiceFramework("IOAuthProcedureForExternalNsa") {
206 // clang-format off
207 static const FunctionInfo functions[] = {
208 {0, nullptr, "PrepareAsync"},
209 {1, nullptr, "GetRequest"},
210 {2, nullptr, "ApplyResponse"},
211 {3, nullptr, "ApplyResponseAsync"},
212 {10, nullptr, "Suspend"},
213 {100, nullptr, "GetAccountId"},
214 {101, nullptr, "GetLinkedNintendoAccountId"},
215 {102, nullptr, "GetNickname"},
216 {103, nullptr, "GetProfileImage"},
217 };
218 // clang-format on
219
220 RegisterHandlers(functions);
221 }
222};
223
224class IOAuthProcedureForNintendoAccountLinkage final
225 : public ServiceFramework<IOAuthProcedureForNintendoAccountLinkage> {
226public:
227 explicit IOAuthProcedureForNintendoAccountLinkage(Common::UUID user_id)
228 : ServiceFramework("IOAuthProcedureForNintendoAccountLinkage") {
229 // clang-format off
230 static const FunctionInfo functions[] = {
231 {0, nullptr, "PrepareAsync"},
232 {1, nullptr, "GetRequest"},
233 {2, nullptr, "ApplyResponse"},
234 {3, nullptr, "ApplyResponseAsync"},
235 {10, nullptr, "Suspend"},
236 {100, nullptr, "GetRequestWithTheme"},
237 {101, nullptr, "IsNetworkServiceAccountReplaced"},
238 {199, nullptr, "GetUrlForIntroductionOfExtraMembership"}, // 2.0.0 - 5.1.0
239 };
240 // clang-format on
241
242 RegisterHandlers(functions);
243 }
244};
245
246class INotifier final : public ServiceFramework<INotifier> {
247public:
248 explicit INotifier(Common::UUID user_id) : ServiceFramework("INotifier") {
249 // clang-format off
250 static const FunctionInfo functions[] = {
251 {0, nullptr, "GetSystemEvent"},
252 };
253 // clang-format on
254
255 RegisterHandlers(functions);
256 }
257};
258
47class IProfileCommon : public ServiceFramework<IProfileCommon> { 259class IProfileCommon : public ServiceFramework<IProfileCommon> {
48public: 260public:
49 explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, 261 explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
@@ -226,6 +438,54 @@ public:
226 : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {} 438 : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
227}; 439};
228 440
441class IAsyncContext final : public ServiceFramework<IAsyncContext> {
442public:
443 explicit IAsyncContext(Common::UUID user_id) : ServiceFramework("IAsyncContext") {
444 // clang-format off
445 static const FunctionInfo functions[] = {
446 {0, nullptr, "GetSystemEvent"},
447 {1, nullptr, "Cancel"},
448 {2, nullptr, "HasDone"},
449 {3, nullptr, "GetResult"},
450 };
451 // clang-format on
452
453 RegisterHandlers(functions);
454 }
455};
456
457class ISessionObject final : public ServiceFramework<ISessionObject> {
458public:
459 explicit ISessionObject(Common::UUID user_id) : ServiceFramework("ISessionObject") {
460 // clang-format off
461 static const FunctionInfo functions[] = {
462 {999, nullptr, "Dummy"},
463 };
464 // clang-format on
465
466 RegisterHandlers(functions);
467 }
468};
469
470class IGuestLoginRequest final : public ServiceFramework<IGuestLoginRequest> {
471public:
472 explicit IGuestLoginRequest(Common::UUID) : ServiceFramework("IGuestLoginRequest") {
473 // clang-format off
474 static const FunctionInfo functions[] = {
475 {0, nullptr, "GetSessionId"},
476 {11, nullptr, "Unknown"}, // 1.0.0 - 2.3.0 (the name is blank on Switchbrew)
477 {12, nullptr, "GetAccountId"},
478 {13, nullptr, "GetLinkedNintendoAccountId"},
479 {14, nullptr, "GetNickname"},
480 {15, nullptr, "GetProfileImage"},
481 {21, nullptr, "LoadIdTokenCache"}, // 3.0.0+
482 };
483 // clang-format on
484
485 RegisterHandlers(functions);
486 }
487};
488
229class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { 489class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
230public: 490public:
231 explicit IManagerForApplication(Common::UUID user_id) 491 explicit IManagerForApplication(Common::UUID user_id)
@@ -265,6 +525,87 @@ private:
265 Common::UUID user_id; 525 Common::UUID user_id;
266}; 526};
267 527
528// 6.0.0+
529class IAsyncNetworkServiceLicenseKindContext final
530 : public ServiceFramework<IAsyncNetworkServiceLicenseKindContext> {
531public:
532 explicit IAsyncNetworkServiceLicenseKindContext(Common::UUID user_id)
533 : ServiceFramework("IAsyncNetworkServiceLicenseKindContext") {
534 // clang-format off
535 static const FunctionInfo functions[] = {
536 {0, nullptr, "GetSystemEvent"},
537 {1, nullptr, "Cancel"},
538 {2, nullptr, "HasDone"},
539 {3, nullptr, "GetResult"},
540 {4, nullptr, "GetNetworkServiceLicenseKind"},
541 };
542 // clang-format on
543
544 RegisterHandlers(functions);
545 }
546};
547
548// 8.0.0+
549class IOAuthProcedureForUserRegistration final
550 : public ServiceFramework<IOAuthProcedureForUserRegistration> {
551public:
552 explicit IOAuthProcedureForUserRegistration(Common::UUID user_id)
553 : ServiceFramework("IOAuthProcedureForUserRegistration") {
554 // clang-format off
555 static const FunctionInfo functions[] = {
556 {0, nullptr, "PrepareAsync"},
557 {1, nullptr, "GetRequest"},
558 {2, nullptr, "ApplyResponse"},
559 {3, nullptr, "ApplyResponseAsync"},
560 {10, nullptr, "Suspend"},
561 {100, nullptr, "GetAccountId"},
562 {101, nullptr, "GetLinkedNintendoAccountId"},
563 {102, nullptr, "GetNickname"},
564 {103, nullptr, "GetProfileImage"},
565 {110, nullptr, "RegisterUserAsync"},
566 {111, nullptr, "GetUid"},
567 };
568 // clang-format on
569
570 RegisterHandlers(functions);
571 }
572};
573
574class DAUTH_O final : public ServiceFramework<DAUTH_O> {
575public:
576 explicit DAUTH_O(Common::UUID) : ServiceFramework("dauth:o") {
577 // clang-format off
578 static const FunctionInfo functions[] = {
579 {0, nullptr, "EnsureAuthenticationTokenCacheAsync"}, // [5.0.0-5.1.0] GeneratePostData
580 {1, nullptr, "LoadAuthenticationTokenCache"}, // 6.0.0+
581 {2, nullptr, "InvalidateAuthenticationTokenCache"}, // 6.0.0+
582 {10, nullptr, "EnsureEdgeTokenCacheAsync"}, // 6.0.0+
583 {11, nullptr, "LoadEdgeTokenCache"}, // 6.0.0+
584 {12, nullptr, "InvalidateEdgeTokenCache"}, // 6.0.0+
585 };
586 // clang-format on
587
588 RegisterHandlers(functions);
589 }
590};
591
592// 6.0.0+
593class IAsyncResult final : public ServiceFramework<IAsyncResult> {
594public:
595 explicit IAsyncResult(Common::UUID user_id) : ServiceFramework("IAsyncResult") {
596 // clang-format off
597 static const FunctionInfo functions[] = {
598 {0, nullptr, "GetResult"},
599 {1, nullptr, "Cancel"},
600 {2, nullptr, "IsAvailable"},
601 {3, nullptr, "GetSystemEvent"},
602 };
603 // clang-format on
604
605 RegisterHandlers(functions);
606 }
607};
608
268void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) { 609void Module::Interface::GetUserCount(Kernel::HLERequestContext& ctx) {
269 LOG_DEBUG(Service_ACC, "called"); 610 LOG_DEBUG(Service_ACC, "called");
270 IPC::ResponseBuilder rb{ctx, 3}; 611 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/acc/acc_aa.cpp b/src/core/hle/service/acc/acc_aa.cpp
index 3bac6bcd1..51f119b12 100644
--- a/src/core/hle/service/acc/acc_aa.cpp
+++ b/src/core/hle/service/acc/acc_aa.cpp
@@ -13,8 +13,8 @@ ACC_AA::ACC_AA(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
13 {0, nullptr, "EnsureCacheAsync"}, 13 {0, nullptr, "EnsureCacheAsync"},
14 {1, nullptr, "LoadCache"}, 14 {1, nullptr, "LoadCache"},
15 {2, nullptr, "GetDeviceAccountId"}, 15 {2, nullptr, "GetDeviceAccountId"},
16 {50, nullptr, "RegisterNotificationTokenAsync"}, 16 {50, nullptr, "RegisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
17 {51, nullptr, "UnregisterNotificationTokenAsync"}, 17 {51, nullptr, "UnregisterNotificationTokenAsync"}, // 1.0.0 - 6.2.0
18 }; 18 };
19 RegisterHandlers(functions); 19 RegisterHandlers(functions);
20} 20}
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index 2eefc6df5..85620bde3 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -17,28 +17,28 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
17 {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"}, 17 {3, &ACC_SU::ListOpenUsers, "ListOpenUsers"},
18 {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"}, 18 {4, &ACC_SU::GetLastOpenedUser, "GetLastOpenedUser"},
19 {5, &ACC_SU::GetProfile, "GetProfile"}, 19 {5, &ACC_SU::GetProfile, "GetProfile"},
20 {6, nullptr, "GetProfileDigest"}, 20 {6, nullptr, "GetProfileDigest"}, // 3.0.0+
21 {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, 21 {50, &ACC_SU::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
22 {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_SU::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
25 {100, nullptr, "GetUserRegistrationNotifier"}, 25 {100, nullptr, "GetUserRegistrationNotifier"},
26 {101, nullptr, "GetUserStateChangeNotifier"}, 26 {101, nullptr, "GetUserStateChangeNotifier"},
27 {102, nullptr, "GetBaasAccountManagerForSystemService"}, 27 {102, nullptr, "GetBaasAccountManagerForSystemService"},
28 {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, 28 {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
29 {104, nullptr, "GetProfileUpdateNotifier"}, 29 {104, nullptr, "GetProfileUpdateNotifier"},
30 {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 30 {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
31 {106, nullptr, "GetProfileSyncNotifier"}, 31 {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
32 {110, nullptr, "StoreSaveDataThumbnail"}, 32 {110, nullptr, "StoreSaveDataThumbnail"},
33 {111, nullptr, "ClearSaveDataThumbnail"}, 33 {111, nullptr, "ClearSaveDataThumbnail"},
34 {112, nullptr, "LoadSaveDataThumbnail"}, 34 {112, nullptr, "LoadSaveDataThumbnail"},
35 {113, nullptr, "GetSaveDataThumbnailExistence"}, 35 {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
36 {120, nullptr, "ListOpenUsersInApplication"}, 36 {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
37 {130, nullptr, "ActivateOpenContextRetention"}, 37 {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
38 {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, 38 {140, &ACC_SU::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
39 {150, nullptr, "AuthenticateApplicationAsync"}, 39 {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
40 {190, nullptr, "GetUserLastOpenedApplication"}, 40 {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
41 {191, nullptr, "ActivateOpenContextHolder"}, 41 {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
42 {200, nullptr, "BeginUserRegistration"}, 42 {200, nullptr, "BeginUserRegistration"},
43 {201, nullptr, "CompleteUserRegistration"}, 43 {201, nullptr, "CompleteUserRegistration"},
44 {202, nullptr, "CancelUserRegistration"}, 44 {202, nullptr, "CancelUserRegistration"},
@@ -46,15 +46,15 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
46 {204, nullptr, "SetUserPosition"}, 46 {204, nullptr, "SetUserPosition"},
47 {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"}, 47 {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
48 {206, nullptr, "CompleteUserRegistrationForcibly"}, 48 {206, nullptr, "CompleteUserRegistrationForcibly"},
49 {210, nullptr, "CreateFloatingRegistrationRequest"}, 49 {210, nullptr, "CreateFloatingRegistrationRequest"}, // 3.0.0+
50 {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, 50 {211, nullptr, "CreateProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
51 {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, 51 {212, nullptr, "ResumeProcedureToRegisterUserWithNintendoAccount"}, // 8.0.0+
52 {230, nullptr, "AuthenticateServiceAsync"}, 52 {230, nullptr, "AuthenticateServiceAsync"},
53 {250, nullptr, "GetBaasAccountAdministrator"}, 53 {250, nullptr, "GetBaasAccountAdministrator"},
54 {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"}, 54 {290, nullptr, "ProxyProcedureForGuestLoginWithNintendoAccount"},
55 {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, 55 {291, nullptr, "ProxyProcedureForFloatingRegistrationWithNintendoAccount"}, // 3.0.0+
56 {299, nullptr, "SuspendBackgroundDaemon"}, 56 {299, nullptr, "SuspendBackgroundDaemon"},
57 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, 57 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
58 {998, nullptr, "DebugSetUserStateClose"}, 58 {998, nullptr, "DebugSetUserStateClose"},
59 {999, nullptr, "DebugSetUserStateOpen"}, 59 {999, nullptr, "DebugSetUserStateOpen"},
60 }; 60 };
diff --git a/src/core/hle/service/acc/acc_u0.cpp b/src/core/hle/service/acc/acc_u0.cpp
index fb4e7e772..49f6e20f1 100644
--- a/src/core/hle/service/acc/acc_u0.cpp
+++ b/src/core/hle/service/acc/acc_u0.cpp
@@ -17,23 +17,23 @@ ACC_U0::ACC_U0(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
17 {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"}, 17 {3, &ACC_U0::ListOpenUsers, "ListOpenUsers"},
18 {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"}, 18 {4, &ACC_U0::GetLastOpenedUser, "GetLastOpenedUser"},
19 {5, &ACC_U0::GetProfile, "GetProfile"}, 19 {5, &ACC_U0::GetProfile, "GetProfile"},
20 {6, nullptr, "GetProfileDigest"}, 20 {6, nullptr, "GetProfileDigest"}, // 3.0.0+
21 {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, 21 {50, &ACC_U0::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
22 {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_U0::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
25 {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"}, 25 {100, &ACC_U0::InitializeApplicationInfo, "InitializeApplicationInfo"},
26 {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"}, 26 {101, &ACC_U0::GetBaasAccountManagerForApplication, "GetBaasAccountManagerForApplication"},
27 {102, nullptr, "AuthenticateApplicationAsync"}, 27 {102, nullptr, "AuthenticateApplicationAsync"},
28 {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 28 {103, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
29 {110, nullptr, "StoreSaveDataThumbnail"}, 29 {110, nullptr, "StoreSaveDataThumbnail"},
30 {111, nullptr, "ClearSaveDataThumbnail"}, 30 {111, nullptr, "ClearSaveDataThumbnail"},
31 {120, nullptr, "CreateGuestLoginRequest"}, 31 {120, nullptr, "CreateGuestLoginRequest"},
32 {130, nullptr, "LoadOpenContext"}, 32 {130, nullptr, "LoadOpenContext"}, // 5.0.0+
33 {131, nullptr, "ListOpenContextStoredUsers"}, 33 {131, nullptr, "ListOpenContextStoredUsers"}, // 6.0.0+
34 {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, 34 {140, &ACC_U0::InitializeApplicationInfoRestricted, "InitializeApplicationInfoRestricted"}, // 6.0.0+
35 {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, 35 {141, &ACC_U0::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
36 {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, 36 {150, &ACC_U0::IsUserAccountSwitchLocked, "IsUserAccountSwitchLocked"}, // 6.0.0+
37 }; 37 };
38 // clang-format on 38 // clang-format on
39 39
diff --git a/src/core/hle/service/acc/acc_u1.cpp b/src/core/hle/service/acc/acc_u1.cpp
index 9f29cdc82..f47004f84 100644
--- a/src/core/hle/service/acc/acc_u1.cpp
+++ b/src/core/hle/service/acc/acc_u1.cpp
@@ -17,28 +17,29 @@ ACC_U1::ACC_U1(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
17 {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"}, 17 {3, &ACC_U1::ListOpenUsers, "ListOpenUsers"},
18 {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"}, 18 {4, &ACC_U1::GetLastOpenedUser, "GetLastOpenedUser"},
19 {5, &ACC_U1::GetProfile, "GetProfile"}, 19 {5, &ACC_U1::GetProfile, "GetProfile"},
20 {6, nullptr, "GetProfileDigest"}, 20 {6, nullptr, "GetProfileDigest"}, // 3.0.0+
21 {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"}, 21 {50, &ACC_U1::IsUserRegistrationRequestPermitted, "IsUserRegistrationRequestPermitted"},
22 {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"}, 22 {51, &ACC_U1::TrySelectUserWithoutInteraction, "TrySelectUserWithoutInteraction"},
23 {60, nullptr, "ListOpenContextStoredUsers"}, 23 {60, nullptr, "ListOpenContextStoredUsers"}, // 5.0.0 - 5.1.0
24 {99, nullptr, "DebugActivateOpenContextRetention"}, 24 {99, nullptr, "DebugActivateOpenContextRetention"}, // 6.0.0+
25 {100, nullptr, "GetUserRegistrationNotifier"}, 25 {100, nullptr, "GetUserRegistrationNotifier"},
26 {101, nullptr, "GetUserStateChangeNotifier"}, 26 {101, nullptr, "GetUserStateChangeNotifier"},
27 {102, nullptr, "GetBaasAccountManagerForSystemService"}, 27 {102, nullptr, "GetBaasAccountManagerForSystemService"},
28 {103, nullptr, "GetProfileUpdateNotifier"}, 28 {103, nullptr, "GetBaasUserAvailabilityChangeNotifier"},
29 {104, nullptr, "CheckNetworkServiceAvailabilityAsync"}, 29 {104, nullptr, "GetProfileUpdateNotifier"},
30 {105, nullptr, "GetBaasUserAvailabilityChangeNotifier"}, 30 {105, nullptr, "CheckNetworkServiceAvailabilityAsync"}, // 4.0.0+
31 {106, nullptr, "GetProfileSyncNotifier"}, 31 {106, nullptr, "GetProfileSyncNotifier"}, // 9.0.0+
32 {110, nullptr, "StoreSaveDataThumbnail"}, 32 {110, nullptr, "StoreSaveDataThumbnail"},
33 {111, nullptr, "ClearSaveDataThumbnail"}, 33 {111, nullptr, "ClearSaveDataThumbnail"},
34 {112, nullptr, "LoadSaveDataThumbnail"}, 34 {112, nullptr, "LoadSaveDataThumbnail"},
35 {113, nullptr, "GetSaveDataThumbnailExistence"}, 35 {113, nullptr, "GetSaveDataThumbnailExistence"}, // 5.0.0+
36 {130, nullptr, "ActivateOpenContextRetention"}, 36 {120, nullptr, "ListOpenUsersInApplication"}, // 10.0.0+
37 {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, 37 {130, nullptr, "ActivateOpenContextRetention"}, // 6.0.0+
38 {150, nullptr, "AuthenticateApplicationAsync"}, 38 {140, &ACC_U1::ListQualifiedUsers, "ListQualifiedUsers"}, // 6.0.0+
39 {190, nullptr, "GetUserLastOpenedApplication"}, 39 {150, nullptr, "AuthenticateApplicationAsync"}, // 10.0.0+
40 {191, nullptr, "ActivateOpenContextHolder"}, 40 {190, nullptr, "GetUserLastOpenedApplication"}, // 1.0.0 - 9.2.0
41 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, 41 {191, nullptr, "ActivateOpenContextHolder"}, // 7.0.0+
42 {997, nullptr, "DebugInvalidateTokenCacheForUser"}, // 3.0.0+
42 {998, nullptr, "DebugSetUserStateClose"}, 43 {998, nullptr, "DebugSetUserStateClose"},
43 {999, nullptr, "DebugSetUserStateOpen"}, 44 {999, nullptr, "DebugSetUserStateOpen"},
44 }; 45 };
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 4df74c4f9..20f366635 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -68,6 +68,7 @@ IWindowController::IWindowController(Core::System& system_)
68 static const FunctionInfo functions[] = { 68 static const FunctionInfo functions[] = {
69 {0, nullptr, "CreateWindow"}, 69 {0, nullptr, "CreateWindow"},
70 {1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"}, 70 {1, &IWindowController::GetAppletResourceUserId, "GetAppletResourceUserId"},
71 {2, nullptr, "GetAppletResourceUserIdOfCallerApplet"},
71 {10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"}, 72 {10, &IWindowController::AcquireForegroundRights, "AcquireForegroundRights"},
72 {11, nullptr, "ReleaseForegroundRights"}, 73 {11, nullptr, "ReleaseForegroundRights"},
73 {12, nullptr, "RejectToChangeIntoBackground"}, 74 {12, nullptr, "RejectToChangeIntoBackground"},
@@ -189,8 +190,8 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
189 {5, nullptr, "GetLastForegroundCaptureImageEx"}, 190 {5, nullptr, "GetLastForegroundCaptureImageEx"},
190 {6, nullptr, "GetLastApplicationCaptureImageEx"}, 191 {6, nullptr, "GetLastApplicationCaptureImageEx"},
191 {7, nullptr, "GetCallerAppletCaptureImageEx"}, 192 {7, nullptr, "GetCallerAppletCaptureImageEx"},
192 {8, nullptr, "TakeScreenShotOfOwnLayer"}, // 2.0.0+ 193 {8, nullptr, "TakeScreenShotOfOwnLayer"},
193 {9, nullptr, "CopyBetweenCaptureBuffers"}, // 5.0.0+ 194 {9, nullptr, "CopyBetweenCaptureBuffers"},
194 {10, nullptr, "AcquireLastApplicationCaptureBuffer"}, 195 {10, nullptr, "AcquireLastApplicationCaptureBuffer"},
195 {11, nullptr, "ReleaseLastApplicationCaptureBuffer"}, 196 {11, nullptr, "ReleaseLastApplicationCaptureBuffer"},
196 {12, nullptr, "AcquireLastForegroundCaptureBuffer"}, 197 {12, nullptr, "AcquireLastForegroundCaptureBuffer"},
@@ -200,17 +201,14 @@ IDisplayController::IDisplayController() : ServiceFramework("IDisplayController"
200 {16, nullptr, "AcquireLastApplicationCaptureBufferEx"}, 201 {16, nullptr, "AcquireLastApplicationCaptureBufferEx"},
201 {17, nullptr, "AcquireLastForegroundCaptureBufferEx"}, 202 {17, nullptr, "AcquireLastForegroundCaptureBufferEx"},
202 {18, nullptr, "AcquireCallerAppletCaptureBufferEx"}, 203 {18, nullptr, "AcquireCallerAppletCaptureBufferEx"},
203 // 2.0.0+
204 {20, nullptr, "ClearCaptureBuffer"}, 204 {20, nullptr, "ClearCaptureBuffer"},
205 {21, nullptr, "ClearAppletTransitionBuffer"}, 205 {21, nullptr, "ClearAppletTransitionBuffer"},
206 // 4.0.0+
207 {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"}, 206 {22, nullptr, "AcquireLastApplicationCaptureSharedBuffer"},
208 {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"}, 207 {23, nullptr, "ReleaseLastApplicationCaptureSharedBuffer"},
209 {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"}, 208 {24, nullptr, "AcquireLastForegroundCaptureSharedBuffer"},
210 {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"}, 209 {25, nullptr, "ReleaseLastForegroundCaptureSharedBuffer"},
211 {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"}, 210 {26, nullptr, "AcquireCallerAppletCaptureSharedBuffer"},
212 {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"}, 211 {27, nullptr, "ReleaseCallerAppletCaptureSharedBuffer"},
213 // 6.0.0+
214 {28, nullptr, "TakeScreenShotOfOwnLayerEx"}, 212 {28, nullptr, "TakeScreenShotOfOwnLayerEx"},
215 }; 213 };
216 // clang-format on 214 // clang-format on
@@ -225,7 +223,7 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
225 static const FunctionInfo functions[] = { 223 static const FunctionInfo functions[] = {
226 {0, nullptr, "NotifyMessageToHomeMenuForDebug"}, 224 {0, nullptr, "NotifyMessageToHomeMenuForDebug"},
227 {1, nullptr, "OpenMainApplication"}, 225 {1, nullptr, "OpenMainApplication"},
228 {10, nullptr, "EmulateButtonEvent"}, 226 {10, nullptr, "PerformSystemButtonPressing"},
229 {20, nullptr, "InvalidateTransitionLayer"}, 227 {20, nullptr, "InvalidateTransitionLayer"},
230 {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"}, 228 {30, nullptr, "RequestLaunchApplicationWithUserAndArgumentForDebug"},
231 {40, nullptr, "GetAppletResourceUsageInfo"}, 229 {40, nullptr, "GetAppletResourceUsageInfo"},
@@ -267,7 +265,7 @@ ISelfController::ISelfController(Core::System& system,
267 {16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"}, 265 {16, &ISelfController::SetOutOfFocusSuspendingEnabled, "SetOutOfFocusSuspendingEnabled"},
268 {17, nullptr, "SetControllerFirmwareUpdateSection"}, 266 {17, nullptr, "SetControllerFirmwareUpdateSection"},
269 {18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"}, 267 {18, nullptr, "SetRequiresCaptureButtonShortPressedMessage"},
270 {19, &ISelfController::SetScreenShotImageOrientation, "SetScreenShotImageOrientation"}, 268 {19, &ISelfController::SetAlbumImageOrientation, "SetAlbumImageOrientation"},
271 {20, nullptr, "SetDesirableKeyboardLayout"}, 269 {20, nullptr, "SetDesirableKeyboardLayout"},
272 {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"}, 270 {40, &ISelfController::CreateManagedDisplayLayer, "CreateManagedDisplayLayer"},
273 {41, nullptr, "IsSystemBufferSharingEnabled"}, 271 {41, nullptr, "IsSystemBufferSharingEnabled"},
@@ -443,7 +441,7 @@ void ISelfController::SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext&
443 rb.Push(RESULT_SUCCESS); 441 rb.Push(RESULT_SUCCESS);
444} 442}
445 443
446void ISelfController::SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx) { 444void ISelfController::SetAlbumImageOrientation(Kernel::HLERequestContext& ctx) {
447 LOG_WARNING(Service_AM, "(STUBBED) called"); 445 LOG_WARNING(Service_AM, "(STUBBED) called");
448 446
449 IPC::ResponseBuilder rb{ctx, 2}; 447 IPC::ResponseBuilder rb{ctx, 2};
@@ -607,6 +605,7 @@ ICommonStateGetter::ICommonStateGetter(Core::System& system,
607 {20, nullptr, "PushToGeneralChannel"}, 605 {20, nullptr, "PushToGeneralChannel"},
608 {30, nullptr, "GetHomeButtonReaderLockAccessor"}, 606 {30, nullptr, "GetHomeButtonReaderLockAccessor"},
609 {31, nullptr, "GetReaderLockAccessorEx"}, 607 {31, nullptr, "GetReaderLockAccessorEx"},
608 {32, nullptr, "GetWriterLockAccessorEx"},
610 {40, nullptr, "GetCradleFwVersion"}, 609 {40, nullptr, "GetCradleFwVersion"},
611 {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"}, 610 {50, &ICommonStateGetter::IsVrModeEnabled, "IsVrModeEnabled"},
612 {51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"}, 611 {51, &ICommonStateGetter::SetVrModeEnabled, "SetVrModeEnabled"},
@@ -1132,6 +1131,7 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
1132 {24, nullptr, "GetLaunchStorageInfoForDebug"}, 1131 {24, nullptr, "GetLaunchStorageInfoForDebug"},
1133 {25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"}, 1132 {25, &IApplicationFunctions::ExtendSaveData, "ExtendSaveData"},
1134 {26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"}, 1133 {26, &IApplicationFunctions::GetSaveDataSize, "GetSaveDataSize"},
1134 {27, nullptr, "CreateCacheStorage"},
1135 {30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"}, 1135 {30, &IApplicationFunctions::BeginBlockingHomeButtonShortAndLongPressed, "BeginBlockingHomeButtonShortAndLongPressed"},
1136 {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"}, 1136 {31, &IApplicationFunctions::EndBlockingHomeButtonShortAndLongPressed, "EndBlockingHomeButtonShortAndLongPressed"},
1137 {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"}, 1137 {32, &IApplicationFunctions::BeginBlockingHomeButton, "BeginBlockingHomeButton"},
@@ -1157,6 +1157,8 @@ IApplicationFunctions::IApplicationFunctions(Core::System& system_)
1157 {120, nullptr, "ExecuteProgram"}, 1157 {120, nullptr, "ExecuteProgram"},
1158 {121, nullptr, "ClearUserChannel"}, 1158 {121, nullptr, "ClearUserChannel"},
1159 {122, nullptr, "UnpopToUserChannel"}, 1159 {122, nullptr, "UnpopToUserChannel"},
1160 {123, nullptr, "GetPreviousProgramIndex"},
1161 {124, nullptr, "EnableApplicationAllThreadDumpOnCrash"},
1160 {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"}, 1162 {130, &IApplicationFunctions::GetGpuErrorDetectedSystemEvent, "GetGpuErrorDetectedSystemEvent"},
1161 {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"}, 1163 {140, &IApplicationFunctions::GetFriendInvitationStorageChannelEvent, "GetFriendInvitationStorageChannelEvent"},
1162 {141, nullptr, "TryPopFromFriendInvitationStorageChannel"}, 1164 {141, nullptr, "TryPopFromFriendInvitationStorageChannel"},
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 469f7f814..2f69466ec 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -138,7 +138,7 @@ private:
138 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx); 138 void SetFocusHandlingMode(Kernel::HLERequestContext& ctx);
139 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx); 139 void SetRestartMessageEnabled(Kernel::HLERequestContext& ctx);
140 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx); 140 void SetOutOfFocusSuspendingEnabled(Kernel::HLERequestContext& ctx);
141 void SetScreenShotImageOrientation(Kernel::HLERequestContext& ctx); 141 void SetAlbumImageOrientation(Kernel::HLERequestContext& ctx);
142 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx); 142 void CreateManagedDisplayLayer(Kernel::HLERequestContext& ctx);
143 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 143 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
144 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 144 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index 54e63c138..fbe3686ae 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -30,7 +30,7 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
30 config.sub_text.size()); 30 config.sub_text.size());
31 params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(), 31 params.guide_text = Common::UTF16StringFromFixedZeroTerminatedBuffer(config.guide_text.data(),
32 config.guide_text.size()); 32 config.guide_text.size());
33 params.initial_text = initial_text; 33 params.initial_text = std::move(initial_text);
34 params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit; 34 params.max_length = config.length_limit == 0 ? DEFAULT_MAX_LENGTH : config.length_limit;
35 params.password = static_cast<bool>(config.is_password); 35 params.password = static_cast<bool>(config.is_password);
36 params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position); 36 params.cursor_at_beginning = static_cast<bool>(config.initial_cursor_position);
@@ -60,7 +60,7 @@ void SoftwareKeyboard::Initialize() {
60 std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig)); 60 std::memcpy(&config, keyboard_config.data(), sizeof(KeyboardConfig));
61 61
62 const auto work_buffer_storage = broker.PopNormalDataToApplet(); 62 const auto work_buffer_storage = broker.PopNormalDataToApplet();
63 ASSERT(work_buffer_storage != nullptr); 63 ASSERT_OR_EXECUTE(work_buffer_storage != nullptr, { return; });
64 const auto& work_buffer = work_buffer_storage->GetData(); 64 const auto& work_buffer = work_buffer_storage->GetData();
65 65
66 if (config.initial_string_size == 0) 66 if (config.initial_string_size == 0)
@@ -109,7 +109,7 @@ void SoftwareKeyboard::Execute() {
109 109
110 const auto parameters = ConvertToFrontendParameters(config, initial_text); 110 const auto parameters = ConvertToFrontendParameters(config, initial_text);
111 111
112 frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(text); }, 112 frontend.RequestText([this](std::optional<std::u16string> text) { WriteText(std::move(text)); },
113 parameters); 113 parameters);
114} 114}
115 115
diff --git a/src/core/hle/service/am/spsm.cpp b/src/core/hle/service/am/spsm.cpp
index 003ee8667..f27729ce7 100644
--- a/src/core/hle/service/am/spsm.cpp
+++ b/src/core/hle/service/am/spsm.cpp
@@ -10,17 +10,17 @@ SPSM::SPSM() : ServiceFramework{"spsm"} {
10 // clang-format off 10 // clang-format off
11 static const FunctionInfo functions[] = { 11 static const FunctionInfo functions[] = {
12 {0, nullptr, "GetState"}, 12 {0, nullptr, "GetState"},
13 {1, nullptr, "SleepSystemAndWaitAwake"}, 13 {1, nullptr, "EnterSleep"},
14 {2, nullptr, "Unknown1"}, 14 {2, nullptr, "GetLastWakeReason"},
15 {3, nullptr, "Unknown2"}, 15 {3, nullptr, "Shutdown"},
16 {4, nullptr, "GetNotificationMessageEventHandle"}, 16 {4, nullptr, "GetNotificationMessageEventHandle"},
17 {5, nullptr, "Unknown3"}, 17 {5, nullptr, "ReceiveNotificationMessage"},
18 {6, nullptr, "Unknown4"}, 18 {6, nullptr, "AnalyzeLogForLastSleepWakeSequence"},
19 {7, nullptr, "Unknown5"}, 19 {7, nullptr, "ResetEventLog"},
20 {8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"}, 20 {8, nullptr, "AnalyzePerformanceLogForLastSleepWakeSequence"},
21 {9, nullptr, "ChangeHomeButtonLongPressingTime"}, 21 {9, nullptr, "ChangeHomeButtonLongPressingTime"},
22 {10, nullptr, "Unknown6"}, 22 {10, nullptr, "PutErrorState"},
23 {11, nullptr, "Unknown7"}, 23 {11, nullptr, "InvalidateCurrentHomeButtonPressing"},
24 }; 24 };
25 // clang-format on 25 // clang-format on
26 26
diff --git a/src/core/hle/service/aoc/aoc_u.cpp b/src/core/hle/service/aoc/aoc_u.cpp
index 4227a4adf..8e79f707b 100644
--- a/src/core/hle/service/aoc/aoc_u.cpp
+++ b/src/core/hle/service/aoc/aoc_u.cpp
@@ -60,6 +60,7 @@ AOC_U::AOC_U(Core::System& system)
60 {6, nullptr, "PrepareAddOnContentByApplicationId"}, 60 {6, nullptr, "PrepareAddOnContentByApplicationId"},
61 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"}, 61 {7, &AOC_U::PrepareAddOnContent, "PrepareAddOnContent"},
62 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"}, 62 {8, &AOC_U::GetAddOnContentListChangedEvent, "GetAddOnContentListChangedEvent"},
63 {9, nullptr, "GetAddOnContentLostErrorCode"},
63 {100, nullptr, "CreateEcPurchasedEventManager"}, 64 {100, nullptr, "CreateEcPurchasedEventManager"},
64 {101, nullptr, "CreatePermanentEcPurchasedEventManager"}, 65 {101, nullptr, "CreatePermanentEcPurchasedEventManager"},
65 }; 66 };
diff --git a/src/core/hle/service/bcat/bcat.cpp b/src/core/hle/service/bcat/bcat.cpp
index 8bb2528c9..b31766212 100644
--- a/src/core/hle/service/bcat/bcat.cpp
+++ b/src/core/hle/service/bcat/bcat.cpp
@@ -14,6 +14,8 @@ BCAT::BCAT(Core::System& system, std::shared_ptr<Module> module,
14 {0, &BCAT::CreateBcatService, "CreateBcatService"}, 14 {0, &BCAT::CreateBcatService, "CreateBcatService"},
15 {1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"}, 15 {1, &BCAT::CreateDeliveryCacheStorageService, "CreateDeliveryCacheStorageService"},
16 {2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"}, 16 {2, &BCAT::CreateDeliveryCacheStorageServiceWithApplicationId, "CreateDeliveryCacheStorageServiceWithApplicationId"},
17 {3, nullptr, "CreateDeliveryCacheProgressService"},
18 {4, nullptr, "CreateDeliveryCacheProgressServiceWithApplicationId"},
17 }; 19 };
18 // clang-format on 20 // clang-format on
19 RegisterHandlers(functions); 21 RegisterHandlers(functions);
diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp
index 34aba7a27..603b64d4f 100644
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -143,10 +143,13 @@ public:
143 {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"}, 143 {20401, nullptr, "UnregisterSystemApplicationDeliveryTask"},
144 {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"}, 144 {20410, nullptr, "SetSystemApplicationDeliveryTaskTimer"},
145 {30100, &IBcatService::SetPassphrase, "SetPassphrase"}, 145 {30100, &IBcatService::SetPassphrase, "SetPassphrase"},
146 {30101, nullptr, "Unknown"},
147 {30102, nullptr, "Unknown2"},
146 {30200, nullptr, "RegisterBackgroundDeliveryTask"}, 148 {30200, nullptr, "RegisterBackgroundDeliveryTask"},
147 {30201, nullptr, "UnregisterBackgroundDeliveryTask"}, 149 {30201, nullptr, "UnregisterBackgroundDeliveryTask"},
148 {30202, nullptr, "BlockDeliveryTask"}, 150 {30202, nullptr, "BlockDeliveryTask"},
149 {30203, nullptr, "UnblockDeliveryTask"}, 151 {30203, nullptr, "UnblockDeliveryTask"},
152 {30210, nullptr, "SetDeliveryTaskTimer"},
150 {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"}, 153 {30300, nullptr, "RegisterSystemApplicationDeliveryTasks"},
151 {90100, nullptr, "EnumerateBackgroundDeliveryTask"}, 154 {90100, nullptr, "EnumerateBackgroundDeliveryTask"},
152 {90200, nullptr, "GetDeliveryList"}, 155 {90200, nullptr, "GetDeliveryList"},
diff --git a/src/core/hle/service/bpc/bpc.cpp b/src/core/hle/service/bpc/bpc.cpp
index 1c1ecdb60..fac6b2f9c 100644
--- a/src/core/hle/service/bpc/bpc.cpp
+++ b/src/core/hle/service/bpc/bpc.cpp
@@ -23,9 +23,14 @@ public:
23 {5, nullptr, "GetBoardPowerControlEvent"}, 23 {5, nullptr, "GetBoardPowerControlEvent"},
24 {6, nullptr, "GetSleepButtonState"}, 24 {6, nullptr, "GetSleepButtonState"},
25 {7, nullptr, "GetPowerEvent"}, 25 {7, nullptr, "GetPowerEvent"},
26 {8, nullptr, "Unknown1"}, 26 {8, nullptr, "CreateWakeupTimer"},
27 {9, nullptr, "Unknown2"}, 27 {9, nullptr, "CancelWakeupTimer"},
28 {10, nullptr, "Unknown3"}, 28 {10, nullptr, "EnableWakeupTimerOnDevice"},
29 {11, nullptr, "CreateWakeupTimerEx"},
30 {12, nullptr, "GetLastEnabledWakeupTimerType"},
31 {13, nullptr, "CleanAllWakeupTimers"},
32 {14, nullptr, "Unknown"},
33 {15, nullptr, "Unknown2"},
29 }; 34 };
30 // clang-format on 35 // clang-format on
31 36
@@ -38,10 +43,11 @@ public:
38 explicit BPC_R() : ServiceFramework{"bpc:r"} { 43 explicit BPC_R() : ServiceFramework{"bpc:r"} {
39 // clang-format off 44 // clang-format off
40 static const FunctionInfo functions[] = { 45 static const FunctionInfo functions[] = {
41 {0, nullptr, "GetExternalRtcValue"}, 46 {0, nullptr, "GetRtcTime"},
42 {1, nullptr, "SetExternalRtcValue"}, 47 {1, nullptr, "SetRtcTime"},
43 {2, nullptr, "ReadExternalRtcResetFlag"}, 48 {2, nullptr, "GetRtcResetDetected"},
44 {3, nullptr, "ClearExternalRtcResetFlag"}, 49 {3, nullptr, "ClearRtcResetDetected"},
50 {4, nullptr, "SetUpRtcResetOnShutdown"},
45 }; 51 };
46 // clang-format on 52 // clang-format on
47 53
diff --git a/src/core/hle/service/btdrv/btdrv.cpp b/src/core/hle/service/btdrv/btdrv.cpp
index 40a06c9fd..f311afa2f 100644
--- a/src/core/hle/service/btdrv/btdrv.cpp
+++ b/src/core/hle/service/btdrv/btdrv.cpp
@@ -58,102 +58,103 @@ public:
58 {1, nullptr, "InitializeBluetooth"}, 58 {1, nullptr, "InitializeBluetooth"},
59 {2, nullptr, "EnableBluetooth"}, 59 {2, nullptr, "EnableBluetooth"},
60 {3, nullptr, "DisableBluetooth"}, 60 {3, nullptr, "DisableBluetooth"},
61 {4, nullptr, "CleanupBluetooth"}, 61 {4, nullptr, "FinalizeBluetooth"},
62 {5, nullptr, "GetAdapterProperties"}, 62 {5, nullptr, "GetAdapterProperties"},
63 {6, nullptr, "GetAdapterProperty"}, 63 {6, nullptr, "GetAdapterProperty"},
64 {7, nullptr, "SetAdapterProperty"}, 64 {7, nullptr, "SetAdapterProperty"},
65 {8, nullptr, "StartDiscovery"}, 65 {8, nullptr, "StartInquiry"},
66 {9, nullptr, "CancelDiscovery"}, 66 {9, nullptr, "StopInquiry"},
67 {10, nullptr, "CreateBond"}, 67 {10, nullptr, "CreateBond"},
68 {11, nullptr, "RemoveBond"}, 68 {11, nullptr, "RemoveBond"},
69 {12, nullptr, "CancelBond"}, 69 {12, nullptr, "CancelBond"},
70 {13, nullptr, "PinReply"}, 70 {13, nullptr, "RespondToPinRequest"},
71 {14, nullptr, "SspReply"}, 71 {14, nullptr, "RespondToSspRequest"},
72 {15, nullptr, "GetEventInfo"}, 72 {15, nullptr, "GetEventInfo"},
73 {16, nullptr, "InitializeHid"}, 73 {16, nullptr, "InitializeHid"},
74 {17, nullptr, "HidConnect"}, 74 {17, nullptr, "OpenHidConnection"},
75 {18, nullptr, "HidDisconnect"}, 75 {18, nullptr, "CloseHidConnection"},
76 {19, nullptr, "HidSendData"}, 76 {19, nullptr, "WriteHidData"},
77 {20, nullptr, "HidSendData2"}, 77 {20, nullptr, "WriteHidData2"},
78 {21, nullptr, "HidSetReport"}, 78 {21, nullptr, "SetHidReport"},
79 {22, nullptr, "HidGetReport"}, 79 {22, nullptr, "GetHidReport"},
80 {23, nullptr, "HidWakeController"}, 80 {23, nullptr, "TriggerConnection"},
81 {24, nullptr, "HidAddPairedDevice"}, 81 {24, nullptr, "AddPairedDeviceInfo"},
82 {25, nullptr, "HidGetPairedDevice"}, 82 {25, nullptr, "GetPairedDeviceInfo"},
83 {26, nullptr, "CleanupHid"}, 83 {26, nullptr, "FinalizeHid"},
84 {27, nullptr, "HidGetEventInfo"}, 84 {27, nullptr, "GetHidEventInfo"},
85 {28, nullptr, "ExtSetTsi"}, 85 {28, nullptr, "SetTsi"},
86 {29, nullptr, "ExtSetBurstMode"}, 86 {29, nullptr, "EnableBurstMode"},
87 {30, nullptr, "ExtSetZeroRetran"}, 87 {30, nullptr, "SetZeroRetransmission"},
88 {31, nullptr, "ExtSetMcMode"}, 88 {31, nullptr, "EnableMcMode"},
89 {32, nullptr, "ExtStartLlrMode"}, 89 {32, nullptr, "EnableLlrScan"},
90 {33, nullptr, "ExtExitLlrMode"}, 90 {33, nullptr, "DisableLlrScan"},
91 {34, nullptr, "ExtSetRadio"}, 91 {34, nullptr, "EnableRadio"},
92 {35, nullptr, "ExtSetVisibility"}, 92 {35, nullptr, "SetVisibility"},
93 {36, nullptr, "ExtSetTbfcScan"}, 93 {36, nullptr, "EnableTbfcScan"},
94 {37, nullptr, "RegisterHidReportEvent"}, 94 {37, nullptr, "RegisterHidReportEvent"},
95 {38, nullptr, "HidGetReportEventInfo"}, 95 {38, nullptr, "GetHidReportEventInfo"},
96 {39, nullptr, "GetLatestPlr"}, 96 {39, nullptr, "GetLatestPlr"},
97 {40, nullptr, "ExtGetPendingConnections"}, 97 {40, nullptr, "GetPendingConnections"},
98 {41, nullptr, "GetChannelMap"}, 98 {41, nullptr, "GetChannelMap"},
99 {42, nullptr, "EnableBluetoothBoostSetting"}, 99 {42, nullptr, "EnableTxPowerBoostSetting"},
100 {43, nullptr, "IsBluetoothBoostSettingEnabled"}, 100 {43, nullptr, "IsTxPowerBoostSettingEnabled"},
101 {44, nullptr, "EnableBluetoothAfhSetting"}, 101 {44, nullptr, "EnableAfhSetting"},
102 {45, nullptr, "IsBluetoothAfhSettingEnabled"}, 102 {45, nullptr, "IsAfhSettingEnabled"},
103 {46, nullptr, "InitializeBluetoothLe"}, 103 {46, nullptr, "InitializeBle"},
104 {47, nullptr, "EnableBluetoothLe"}, 104 {47, nullptr, "EnableBle"},
105 {48, nullptr, "DisableBluetoothLe"}, 105 {48, nullptr, "DisableBle"},
106 {49, nullptr, "CleanupBluetoothLe"}, 106 {49, nullptr, "FinalizeBle"},
107 {50, nullptr, "SetLeVisibility"}, 107 {50, nullptr, "SetBleVisibility"},
108 {51, nullptr, "SetLeConnectionParameter"}, 108 {51, nullptr, "SetBleConnectionParameter"},
109 {52, nullptr, "SetLeDefaultConnectionParameter"}, 109 {52, nullptr, "SetBleDefaultConnectionParameter"},
110 {53, nullptr, "SetLeAdvertiseData"}, 110 {53, nullptr, "SetBleAdvertiseData"},
111 {54, nullptr, "SetLeAdvertiseParameter"}, 111 {54, nullptr, "SetBleAdvertiseParameter"},
112 {55, nullptr, "StartLeScan"}, 112 {55, nullptr, "StartBleScan"},
113 {56, nullptr, "StopLeScan"}, 113 {56, nullptr, "StopBleScan"},
114 {57, nullptr, "AddLeScanFilterCondition"}, 114 {57, nullptr, "AddBleScanFilterCondition"},
115 {58, nullptr, "DeleteLeScanFilterCondition"}, 115 {58, nullptr, "DeleteBleScanFilterCondition"},
116 {59, nullptr, "DeleteLeScanFilter"}, 116 {59, nullptr, "DeleteBleScanFilter"},
117 {60, nullptr, "ClearLeScanFilters"}, 117 {60, nullptr, "ClearBleScanFilters"},
118 {61, nullptr, "EnableLeScanFilter"}, 118 {61, nullptr, "EnableBleScanFilter"},
119 {62, nullptr, "RegisterLeClient"}, 119 {62, nullptr, "RegisterGattClient"},
120 {63, nullptr, "UnregisterLeClient"}, 120 {63, nullptr, "UnregisterGattClient"},
121 {64, nullptr, "UnregisterLeClientAll"}, 121 {64, nullptr, "UnregisterAllGattClients"},
122 {65, nullptr, "LeClientConnect"}, 122 {65, nullptr, "ConnectGattServer"},
123 {66, nullptr, "LeClientCancelConnection"}, 123 {66, nullptr, "CancelConnectGattServer"},
124 {67, nullptr, "LeClientDisconnect"}, 124 {67, nullptr, "DisconnectGattServer"},
125 {68, nullptr, "LeClientGetAttributes"}, 125 {68, nullptr, "GetGattAttribute"},
126 {69, nullptr, "LeClientDiscoverService"}, 126 {69, nullptr, "GetGattService"},
127 {70, nullptr, "LeClientConfigureMtu"}, 127 {70, nullptr, "ConfigureAttMtu"},
128 {71, nullptr, "RegisterLeServer"}, 128 {71, nullptr, "RegisterGattServer"},
129 {72, nullptr, "UnregisterLeServer"}, 129 {72, nullptr, "UnregisterGattServer"},
130 {73, nullptr, "LeServerConnect"}, 130 {73, nullptr, "ConnectGattClient"},
131 {74, nullptr, "LeServerDisconnect"}, 131 {74, nullptr, "DisconnectGattClient"},
132 {75, nullptr, "CreateLeService"}, 132 {75, nullptr, "AddGattService"},
133 {76, nullptr, "StartLeService"}, 133 {76, nullptr, "EnableGattService"},
134 {77, nullptr, "AddLeCharacteristic"}, 134 {77, nullptr, "AddGattCharacteristic"},
135 {78, nullptr, "AddLeDescriptor"}, 135 {78, nullptr, "AddGattDescriptor"},
136 {79, nullptr, "GetLeCoreEventInfo"}, 136 {79, nullptr, "GetBleManagedEventInfo"},
137 {80, nullptr, "LeGetFirstCharacteristic"}, 137 {80, nullptr, "GetGattFirstCharacteristic"},
138 {81, nullptr, "LeGetNextCharacteristic"}, 138 {81, nullptr, "GetGattNextCharacteristic"},
139 {82, nullptr, "LeGetFirstDescriptor"}, 139 {82, nullptr, "GetGattFirstDescriptor"},
140 {83, nullptr, "LeGetNextDescriptor"}, 140 {83, nullptr, "GetGattNextDescriptor"},
141 {84, nullptr, "RegisterLeCoreDataPath"}, 141 {84, nullptr, "RegisterGattManagedDataPath"},
142 {85, nullptr, "UnregisterLeCoreDataPath"}, 142 {85, nullptr, "UnregisterGattManagedDataPath"},
143 {86, nullptr, "RegisterLeHidDataPath"}, 143 {86, nullptr, "RegisterGattHidDataPath"},
144 {87, nullptr, "UnregisterLeHidDataPath"}, 144 {87, nullptr, "UnregisterGattHidDataPath"},
145 {88, nullptr, "RegisterLeDataPath"}, 145 {88, nullptr, "RegisterGattDataPath"},
146 {89, nullptr, "UnregisterLeDataPath"}, 146 {89, nullptr, "UnregisterGattDataPath"},
147 {90, nullptr, "LeClientReadCharacteristic"}, 147 {90, nullptr, "ReadGattCharacteristic"},
148 {91, nullptr, "LeClientReadDescriptor"}, 148 {91, nullptr, "ReadGattDescriptor"},
149 {92, nullptr, "LeClientWriteCharacteristic"}, 149 {92, nullptr, "WriteGattCharacteristic"},
150 {93, nullptr, "LeClientWriteDescriptor"}, 150 {93, nullptr, "WriteGattDescriptor"},
151 {94, nullptr, "LeClientRegisterNotification"}, 151 {94, nullptr, "RegisterGattNotification"},
152 {95, nullptr, "LeClientDeregisterNotification"}, 152 {95, nullptr, "UnregisterGattNotification"},
153 {96, nullptr, "GetLeHidEventInfo"}, 153 {96, nullptr, "GetLeHidEventInfo"},
154 {97, nullptr, "RegisterBleHidEvent"}, 154 {97, nullptr, "RegisterBleHidEvent"},
155 {98, nullptr, "SetLeScanParameter"}, 155 {98, nullptr, "SetBleScanParameter"},
156 {256, nullptr, "GetIsManufacturingMode"}, 156 {99, nullptr, "MoveToSecondaryPiconet"},
157 {256, nullptr, "IsManufacturingMode"},
157 {257, nullptr, "EmulateBluetoothCrash"}, 158 {257, nullptr, "EmulateBluetoothCrash"},
158 {258, nullptr, "GetBleChannelMap"}, 159 {258, nullptr, "GetBleChannelMap"},
159 }; 160 };
diff --git a/src/core/hle/service/btm/btm.cpp b/src/core/hle/service/btm/btm.cpp
index 251b3c9df..0d251c6d0 100644
--- a/src/core/hle/service/btm/btm.cpp
+++ b/src/core/hle/service/btm/btm.cpp
@@ -132,66 +132,71 @@ public:
132 explicit BTM() : ServiceFramework{"btm"} { 132 explicit BTM() : ServiceFramework{"btm"} {
133 // clang-format off 133 // clang-format off
134 static const FunctionInfo functions[] = { 134 static const FunctionInfo functions[] = {
135 {0, nullptr, "Unknown1"}, 135 {0, nullptr, "GetState"},
136 {1, nullptr, "Unknown2"}, 136 {1, nullptr, "GetHostDeviceProperty"},
137 {2, nullptr, "RegisterSystemEventForConnectedDeviceCondition"}, 137 {2, nullptr, "AcquireDeviceConditionEvent"},
138 {3, nullptr, "Unknown3"}, 138 {3, nullptr, "GetDeviceCondition"},
139 {4, nullptr, "Unknown4"}, 139 {4, nullptr, "SetBurstMode"},
140 {5, nullptr, "Unknown5"}, 140 {5, nullptr, "SetSlotMode"},
141 {6, nullptr, "Unknown6"}, 141 {6, nullptr, "SetBluetoothMode"},
142 {7, nullptr, "Unknown7"}, 142 {7, nullptr, "SetWlanMode"},
143 {8, nullptr, "RegisterSystemEventForRegisteredDeviceInfo"}, 143 {8, nullptr, "AcquireDeviceInfoEvent"},
144 {9, nullptr, "Unknown8"}, 144 {9, nullptr, "GetDeviceInfo"},
145 {10, nullptr, "Unknown9"}, 145 {10, nullptr, "AddDeviceInfo"},
146 {11, nullptr, "Unknown10"}, 146 {11, nullptr, "RemoveDeviceInfo"},
147 {12, nullptr, "Unknown11"}, 147 {12, nullptr, "IncreaseDeviceInfoOrder"},
148 {13, nullptr, "Unknown12"}, 148 {13, nullptr, "LlrNotify"},
149 {14, nullptr, "EnableRadio"}, 149 {14, nullptr, "EnableRadio"},
150 {15, nullptr, "DisableRadio"}, 150 {15, nullptr, "DisableRadio"},
151 {16, nullptr, "Unknown13"}, 151 {16, nullptr, "HidDisconnect"},
152 {17, nullptr, "Unknown14"}, 152 {17, nullptr, "HidSetRetransmissionMode"},
153 {18, nullptr, "Unknown15"}, 153 {18, nullptr, "AcquireAwakeReqEvent"},
154 {19, nullptr, "Unknown16"}, 154 {19, nullptr, "AcquireLlrStateEvent"},
155 {20, nullptr, "Unknown17"}, 155 {20, nullptr, "IsLlrStarted"},
156 {21, nullptr, "Unknown18"}, 156 {21, nullptr, "EnableSlotSaving"},
157 {22, nullptr, "Unknown19"}, 157 {22, nullptr, "ProtectDeviceInfo"},
158 {23, nullptr, "Unknown20"}, 158 {23, nullptr, "AcquireBleScanEvent"},
159 {24, nullptr, "Unknown21"}, 159 {24, nullptr, "GetBleScanParameterGeneral"},
160 {25, nullptr, "Unknown22"}, 160 {25, nullptr, "GetBleScanParameterSmartDevice"},
161 {26, nullptr, "Unknown23"}, 161 {26, nullptr, "StartBleScanForGeneral"},
162 {27, nullptr, "Unknown24"}, 162 {27, nullptr, "StopBleScanForGeneral"},
163 {28, nullptr, "Unknown25"}, 163 {28, nullptr, "GetBleScanResultsForGeneral"},
164 {29, nullptr, "Unknown26"}, 164 {29, nullptr, "StartBleScanForPairedDevice"},
165 {30, nullptr, "Unknown27"}, 165 {30, nullptr, "StopBleScanForPairedDevice"},
166 {31, nullptr, "Unknown28"}, 166 {31, nullptr, "StartBleScanForSmartDevice"},
167 {32, nullptr, "Unknown29"}, 167 {32, nullptr, "StopBleScanForSmartDevice"},
168 {33, nullptr, "Unknown30"}, 168 {33, nullptr, "GetBleScanResultsForSmartDevice"},
169 {34, nullptr, "Unknown31"}, 169 {34, nullptr, "AcquireBleConnectionEvent"},
170 {35, nullptr, "Unknown32"}, 170 {35, nullptr, "BleConnect"},
171 {36, nullptr, "Unknown33"}, 171 {36, nullptr, "BleOverrideConnection"},
172 {37, nullptr, "Unknown34"}, 172 {37, nullptr, "BleDisconnect"},
173 {38, nullptr, "Unknown35"}, 173 {38, nullptr, "BleGetConnectionState"},
174 {39, nullptr, "Unknown36"}, 174 {39, nullptr, "BleGetGattClientConditionList"},
175 {40, nullptr, "Unknown37"}, 175 {40, nullptr, "AcquireBlePairingEvent"},
176 {41, nullptr, "Unknown38"}, 176 {41, nullptr, "BlePairDevice"},
177 {42, nullptr, "Unknown39"}, 177 {42, nullptr, "BleUnpairDeviceOnBoth"},
178 {43, nullptr, "Unknown40"}, 178 {43, nullptr, "BleUnpairDevice"},
179 {44, nullptr, "Unknown41"}, 179 {44, nullptr, "BleGetPairedAddresses"},
180 {45, nullptr, "Unknown42"}, 180 {45, nullptr, "AcquireBleServiceDiscoveryEvent"},
181 {46, nullptr, "Unknown43"}, 181 {46, nullptr, "GetGattServices"},
182 {47, nullptr, "Unknown44"}, 182 {47, nullptr, "GetGattService"},
183 {48, nullptr, "Unknown45"}, 183 {48, nullptr, "GetGattIncludedServices"},
184 {49, nullptr, "Unknown46"}, 184 {49, nullptr, "GetBelongingService"},
185 {50, nullptr, "Unknown47"}, 185 {50, nullptr, "GetGattCharacteristics"},
186 {51, nullptr, "Unknown48"}, 186 {51, nullptr, "GetGattDescriptors"},
187 {52, nullptr, "Unknown49"}, 187 {52, nullptr, "AcquireBleMtuConfigEvent"},
188 {53, nullptr, "Unknown50"}, 188 {53, nullptr, "ConfigureBleMtu"},
189 {54, nullptr, "Unknown51"}, 189 {54, nullptr, "GetBleMtu"},
190 {55, nullptr, "Unknown52"}, 190 {55, nullptr, "RegisterBleGattDataPath"},
191 {56, nullptr, "Unknown53"}, 191 {56, nullptr, "UnregisterBleGattDataPath"},
192 {57, nullptr, "Unknown54"}, 192 {57, nullptr, "RegisterAppletResourceUserId"},
193 {58, nullptr, "Unknown55"}, 193 {58, nullptr, "UnregisterAppletResourceUserId"},
194 {59, nullptr, "Unknown56"}, 194 {59, nullptr, "SetAppletResourceUserId"},
195 {60, nullptr, "Unknown60"},
196 {61, nullptr, "Unknown61"},
197 {62, nullptr, "Unknown62"},
198 {63, nullptr, "Unknown63"},
199 {64, nullptr, "Unknown64"},
195 }; 200 };
196 // clang-format on 201 // clang-format on
197 202
@@ -204,19 +209,19 @@ public:
204 explicit BTM_DBG() : ServiceFramework{"btm:dbg"} { 209 explicit BTM_DBG() : ServiceFramework{"btm:dbg"} {
205 // clang-format off 210 // clang-format off
206 static const FunctionInfo functions[] = { 211 static const FunctionInfo functions[] = {
207 {0, nullptr, "RegisterSystemEventForDiscovery"}, 212 {0, nullptr, "AcquireDiscoveryEvent"},
208 {1, nullptr, "Unknown1"}, 213 {1, nullptr, "StartDiscovery"},
209 {2, nullptr, "Unknown2"}, 214 {2, nullptr, "CancelDiscovery"},
210 {3, nullptr, "Unknown3"}, 215 {3, nullptr, "GetDeviceProperty"},
211 {4, nullptr, "Unknown4"}, 216 {4, nullptr, "CreateBond"},
212 {5, nullptr, "Unknown5"}, 217 {5, nullptr, "CancelBond"},
213 {6, nullptr, "Unknown6"}, 218 {6, nullptr, "SetTsiMode"},
214 {7, nullptr, "Unknown7"}, 219 {7, nullptr, "GeneralTest"},
215 {8, nullptr, "Unknown8"}, 220 {8, nullptr, "HidConnect"},
216 {9, nullptr, "Unknown9"}, 221 {9, nullptr, "GeneralGet"},
217 {10, nullptr, "Unknown10"}, 222 {10, nullptr, "GetGattClientDisconnectionReason"},
218 {11, nullptr, "Unknown11"}, 223 {11, nullptr, "GetBleConnectionParameter"},
219 {12, nullptr, "Unknown11"}, 224 {12, nullptr, "GetBleConnectionParameterRequest"},
220 }; 225 };
221 // clang-format on 226 // clang-format on
222 227
diff --git a/src/core/hle/service/caps/caps.cpp b/src/core/hle/service/caps/caps.cpp
index 26c8a7081..ba5749b84 100644
--- a/src/core/hle/service/caps/caps.cpp
+++ b/src/core/hle/service/caps/caps.cpp
@@ -1,4 +1,4 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h
index fc70a4c27..b8c67b6e2 100644
--- a/src/core/hle/service/caps/caps.h
+++ b/src/core/hle/service/caps/caps.h
@@ -1,4 +1,4 @@
1// Copyright 2018 yuzu emulator team 1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -12,73 +12,79 @@ class ServiceManager;
12 12
13namespace Service::Capture { 13namespace Service::Capture {
14 14
15enum AlbumImageOrientation { 15enum class AlbumImageOrientation {
16 Orientation0 = 0, 16 Orientation0 = 0,
17 Orientation1 = 1, 17 Orientation1 = 1,
18 Orientation2 = 2, 18 Orientation2 = 2,
19 Orientation3 = 3, 19 Orientation3 = 3,
20}; 20};
21 21
22enum AlbumReportOption { 22enum class AlbumReportOption {
23 Disable = 0, 23 Disable = 0,
24 Enable = 1, 24 Enable = 1,
25}; 25};
26 26
27enum ContentType : u8 { 27enum class ContentType : u8 {
28 Screenshot = 0, 28 Screenshot = 0,
29 Movie = 1, 29 Movie = 1,
30 ExtraMovie = 3, 30 ExtraMovie = 3,
31}; 31};
32 32
33enum AlbumStorage : u8 { 33enum class AlbumStorage : u8 {
34 NAND = 0, 34 NAND = 0,
35 SD = 1, 35 SD = 1,
36}; 36};
37 37
38struct AlbumFileDateTime { 38struct AlbumFileDateTime {
39 u16 year; 39 s16 year{};
40 u8 month; 40 s8 month{};
41 u8 day; 41 s8 day{};
42 u8 hour; 42 s8 hour{};
43 u8 minute; 43 s8 minute{};
44 u8 second; 44 s8 second{};
45 u8 uid; 45 s8 uid{};
46}; 46};
47static_assert(sizeof(AlbumFileDateTime) == 0x8, "AlbumFileDateTime has incorrect size.");
47 48
48struct AlbumEntry { 49struct AlbumEntry {
49 u64 size; 50 u64 size{};
50 u64 application_id; 51 u64 application_id{};
51 AlbumFileDateTime datetime; 52 AlbumFileDateTime datetime{};
52 AlbumStorage storage; 53 AlbumStorage storage{};
53 ContentType content; 54 ContentType content{};
54 u8 padding[6]; 55 INSERT_PADDING_BYTES(6);
55}; 56};
57static_assert(sizeof(AlbumEntry) == 0x20, "AlbumEntry has incorrect size.");
56 58
57struct AlbumFileEntry { 59struct AlbumFileEntry {
58 u64 size; 60 u64 size{}; // Size of the entry
59 u64 hash; 61 u64 hash{}; // AES256 with hardcoded key over AlbumEntry
60 AlbumFileDateTime datetime; 62 AlbumFileDateTime datetime{};
61 AlbumStorage storage; 63 AlbumStorage storage{};
62 ContentType content; 64 ContentType content{};
63 u8 padding[5]; 65 INSERT_PADDING_BYTES(5);
64 u8 unknown; 66 u8 unknown{1}; // Set to 1 on official SW
65}; 67};
68static_assert(sizeof(AlbumFileEntry) == 0x20, "AlbumFileEntry has incorrect size.");
66 69
67struct ApplicationAlbumEntry { 70struct ApplicationAlbumEntry {
68 u64 size; 71 u64 size{}; // Size of the entry
69 u64 hash; 72 u64 hash{}; // AES256 with hardcoded key over AlbumEntry
70 AlbumFileDateTime datetime; 73 AlbumFileDateTime datetime{};
71 AlbumStorage storage; 74 AlbumStorage storage{};
72 ContentType content; 75 ContentType content{};
73 u8 padding[5]; 76 INSERT_PADDING_BYTES(5);
74 u8 unknown; 77 u8 unknown{1}; // Set to 1 on official SW
75}; 78};
79static_assert(sizeof(ApplicationAlbumEntry) == 0x20, "ApplicationAlbumEntry has incorrect size.");
76 80
77struct ApplicationAlbumFileEntry { 81struct ApplicationAlbumFileEntry {
78 ApplicationAlbumEntry entry; 82 ApplicationAlbumEntry entry{};
79 AlbumFileDateTime datetime; 83 AlbumFileDateTime datetime{};
80 u64 unknown; 84 u64 unknown{};
81}; 85};
86static_assert(sizeof(ApplicationAlbumFileEntry) == 0x30,
87 "ApplicationAlbumFileEntry has incorrect size.");
82 88
83/// Registers all Capture services with the specified service manager. 89/// Registers all Capture services with the specified service manager.
84void InstallInterfaces(SM::ServiceManager& sm); 90void InstallInterfaces(SM::ServiceManager& sm);
diff --git a/src/core/hle/service/caps/caps_a.cpp b/src/core/hle/service/caps/caps_a.cpp
index 88a3fdc05..a0a3b2ae3 100644
--- a/src/core/hle/service/caps/caps_a.cpp
+++ b/src/core/hle/service/caps/caps_a.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_a.h b/src/core/hle/service/caps/caps_a.h
index 8de832491..cb93aad5b 100644
--- a/src/core/hle/service/caps/caps_a.h
+++ b/src/core/hle/service/caps/caps_a.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_c.cpp b/src/core/hle/service/caps/caps_c.cpp
index ea6452ffa..ab17a187e 100644
--- a/src/core/hle/service/caps/caps_c.cpp
+++ b/src/core/hle/service/caps/caps_c.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_c.h b/src/core/hle/service/caps/caps_c.h
index d07cdb441..a9d028689 100644
--- a/src/core/hle/service/caps/caps_c.h
+++ b/src/core/hle/service/caps/caps_c.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_sc.cpp b/src/core/hle/service/caps/caps_sc.cpp
index d01a8a58e..822ee96c8 100644
--- a/src/core/hle/service/caps/caps_sc.cpp
+++ b/src/core/hle/service/caps/caps_sc.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_sc.h b/src/core/hle/service/caps/caps_sc.h
index 9ba372f7a..ac3e929ca 100644
--- a/src/core/hle/service/caps/caps_sc.h
+++ b/src/core/hle/service/caps/caps_sc.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_ss.cpp b/src/core/hle/service/caps/caps_ss.cpp
index eaa3a7494..24dc716e7 100644
--- a/src/core/hle/service/caps/caps_ss.cpp
+++ b/src/core/hle/service/caps/caps_ss.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_ss.h b/src/core/hle/service/caps/caps_ss.h
index e258a6925..450686e4f 100644
--- a/src/core/hle/service/caps/caps_ss.h
+++ b/src/core/hle/service/caps/caps_ss.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_su.cpp b/src/core/hle/service/caps/caps_su.cpp
index e8b0698e8..fffb2ecf9 100644
--- a/src/core/hle/service/caps/caps_su.cpp
+++ b/src/core/hle/service/caps/caps_su.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_su.h b/src/core/hle/service/caps/caps_su.h
index c494d7c84..62c9603a9 100644
--- a/src/core/hle/service/caps/caps_su.h
+++ b/src/core/hle/service/caps/caps_su.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/caps/caps_u.cpp b/src/core/hle/service/caps/caps_u.cpp
index 78bab6ed8..f36d8de2d 100644
--- a/src/core/hle/service/caps/caps_u.cpp
+++ b/src/core/hle/service/caps/caps_u.cpp
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -58,19 +58,25 @@ void CAPS_U::GetAlbumContentsFileListForApplication(Kernel::HLERequestContext& c
58 // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total 58 // u8 ContentType, two s64s, and an u64 AppletResourceUserId. Returns an output u64 for total
59 // output entries (which is copied to a s32 by official SW). 59 // output entries (which is copied to a s32 by official SW).
60 IPC::RequestParser rp{ctx}; 60 IPC::RequestParser rp{ctx};
61 [[maybe_unused]] const auto application_album_file_entries = rp.PopRaw<std::array<u8, 0x30>>(); 61 const auto pid{rp.Pop<s32>()};
62 const auto pid = rp.Pop<s32>(); 62 const auto content_type{rp.PopEnum<ContentType>()};
63 const auto content_type = rp.PopRaw<ContentType>(); 63 const auto start_posix_time{rp.Pop<s64>()};
64 [[maybe_unused]] const auto start_datetime = rp.PopRaw<AlbumFileDateTime>(); 64 const auto end_posix_time{rp.Pop<s64>()};
65 [[maybe_unused]] const auto end_datetime = rp.PopRaw<AlbumFileDateTime>(); 65 const auto applet_resource_user_id{rp.Pop<u64>()};
66 const auto applet_resource_user_id = rp.Pop<u64>(); 66
67 // TODO: Update this when we implement the album.
68 // Currently we do not have a method of accessing album entries, set this to 0 for now.
69 constexpr s32 total_entries{0};
70
67 LOG_WARNING(Service_Capture, 71 LOG_WARNING(Service_Capture,
68 "(STUBBED) called. pid={}, content_type={}, applet_resource_user_id={}", pid, 72 "(STUBBED) called. pid={}, content_type={}, start_posix_time={}, "
69 content_type, applet_resource_user_id); 73 "end_posix_time={}, applet_resource_user_id={}, total_entries={}",
74 pid, content_type, start_posix_time, end_posix_time, applet_resource_user_id,
75 total_entries);
70 76
71 IPC::ResponseBuilder rb{ctx, 3}; 77 IPC::ResponseBuilder rb{ctx, 3};
72 rb.Push(RESULT_SUCCESS); 78 rb.Push(RESULT_SUCCESS);
73 rb.Push<s32>(0); 79 rb.Push(total_entries);
74} 80}
75 81
76} // namespace Service::Capture 82} // namespace Service::Capture
diff --git a/src/core/hle/service/caps/caps_u.h b/src/core/hle/service/caps/caps_u.h
index e6e0716ff..689364de4 100644
--- a/src/core/hle/service/caps/caps_u.h
+++ b/src/core/hle/service/caps/caps_u.h
@@ -1,4 +1,4 @@
1// Copyright 2020 yuzu emulator team 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp
index f8e9df4b1..9365f27e1 100644
--- a/src/core/hle/service/es/es.cpp
+++ b/src/core/hle/service/es/es.cpp
@@ -27,8 +27,8 @@ public:
27 {8, &ETicket::GetTitleKey, "GetTitleKey"}, 27 {8, &ETicket::GetTitleKey, "GetTitleKey"},
28 {9, &ETicket::CountCommonTicket, "CountCommonTicket"}, 28 {9, &ETicket::CountCommonTicket, "CountCommonTicket"},
29 {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"}, 29 {10, &ETicket::CountPersonalizedTicket, "CountPersonalizedTicket"},
30 {11, &ETicket::ListCommonTicket, "ListCommonTicket"}, 30 {11, &ETicket::ListCommonTicketRightsIds, "ListCommonTicketRightsIds"},
31 {12, &ETicket::ListPersonalizedTicket, "ListPersonalizedTicket"}, 31 {12, &ETicket::ListPersonalizedTicketRightsIds, "ListPersonalizedTicketRightsIds"},
32 {13, nullptr, "ListMissingPersonalizedTicket"}, 32 {13, nullptr, "ListMissingPersonalizedTicket"},
33 {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"}, 33 {14, &ETicket::GetCommonTicketSize, "GetCommonTicketSize"},
34 {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"}, 34 {15, &ETicket::GetPersonalizedTicketSize, "GetPersonalizedTicketSize"},
@@ -55,7 +55,46 @@ public:
55 {36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"}, 55 {36, nullptr, "DeleteAllInactiveELicenseRequiredPersonalizedTicket"},
56 {37, nullptr, "OwnTicket2"}, 56 {37, nullptr, "OwnTicket2"},
57 {38, nullptr, "OwnTicket3"}, 57 {38, nullptr, "OwnTicket3"},
58 {501, nullptr, "Unknown501"},
59 {502, nullptr, "Unknown502"},
58 {503, nullptr, "GetTitleKey"}, 60 {503, nullptr, "GetTitleKey"},
61 {504, nullptr, "Unknown504"},
62 {508, nullptr, "Unknown508"},
63 {509, nullptr, "Unknown509"},
64 {510, nullptr, "Unknown510"},
65 {511, nullptr, "Unknown511"},
66 {1001, nullptr, "Unknown1001"},
67 {1002, nullptr, "Unknown1001"},
68 {1003, nullptr, "Unknown1003"},
69 {1004, nullptr, "Unknown1004"},
70 {1005, nullptr, "Unknown1005"},
71 {1006, nullptr, "Unknown1006"},
72 {1007, nullptr, "Unknown1007"},
73 {1009, nullptr, "Unknown1009"},
74 {1010, nullptr, "Unknown1010"},
75 {1011, nullptr, "Unknown1011"},
76 {1012, nullptr, "Unknown1012"},
77 {1013, nullptr, "Unknown1013"},
78 {1014, nullptr, "Unknown1014"},
79 {1015, nullptr, "Unknown1015"},
80 {1016, nullptr, "Unknown1016"},
81 {1017, nullptr, "Unknown1017"},
82 {1018, nullptr, "Unknown1018"},
83 {1019, nullptr, "Unknown1019"},
84 {1020, nullptr, "Unknown1020"},
85 {1021, nullptr, "Unknown1021"},
86 {1501, nullptr, "Unknown1501"},
87 {1502, nullptr, "Unknown1502"},
88 {1503, nullptr, "Unknown1503"},
89 {1504, nullptr, "Unknown1504"},
90 {1505, nullptr, "Unknown1505"},
91 {2000, nullptr, "Unknown2000"},
92 {2001, nullptr, "Unknown2001"},
93 {2100, nullptr, "Unknown2100"},
94 {2501, nullptr, "Unknown2501"},
95 {2502, nullptr, "Unknown2502"},
96 {3001, nullptr, "Unknown3001"},
97 {3002, nullptr, "Unknown3002"},
59 }; 98 };
60 // clang-format on 99 // clang-format on
61 RegisterHandlers(functions); 100 RegisterHandlers(functions);
@@ -147,7 +186,7 @@ private:
147 rb.Push<u32>(count); 186 rb.Push<u32>(count);
148 } 187 }
149 188
150 void ListCommonTicket(Kernel::HLERequestContext& ctx) { 189 void ListCommonTicketRightsIds(Kernel::HLERequestContext& ctx) {
151 u32 out_entries; 190 u32 out_entries;
152 if (keys.GetCommonTickets().empty()) 191 if (keys.GetCommonTickets().empty())
153 out_entries = 0; 192 out_entries = 0;
@@ -170,7 +209,7 @@ private:
170 rb.Push<u32>(out_entries); 209 rb.Push<u32>(out_entries);
171 } 210 }
172 211
173 void ListPersonalizedTicket(Kernel::HLERequestContext& ctx) { 212 void ListPersonalizedTicketRightsIds(Kernel::HLERequestContext& ctx) {
174 u32 out_entries; 213 u32 out_entries;
175 if (keys.GetPersonalizedTickets().empty()) 214 if (keys.GetPersonalizedTickets().empty())
176 out_entries = 0; 215 out_entries = 0;
diff --git a/src/core/hle/service/eupld/eupld.cpp b/src/core/hle/service/eupld/eupld.cpp
index 2df30acee..0d6d244f4 100644
--- a/src/core/hle/service/eupld/eupld.cpp
+++ b/src/core/hle/service/eupld/eupld.cpp
@@ -19,6 +19,7 @@ public:
19 {1, nullptr, "ImportCrt"}, 19 {1, nullptr, "ImportCrt"},
20 {2, nullptr, "ImportPki"}, 20 {2, nullptr, "ImportPki"},
21 {3, nullptr, "SetAutoUpload"}, 21 {3, nullptr, "SetAutoUpload"},
22 {4, nullptr, "GetAutoUpload"},
22 }; 23 };
23 // clang-format on 24 // clang-format on
24 25
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index 68f259b70..b7adaffc7 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -25,9 +25,13 @@ public:
25 {10101, &IFriendService::GetFriendList, "GetFriendList"}, 25 {10101, &IFriendService::GetFriendList, "GetFriendList"},
26 {10102, nullptr, "UpdateFriendInfo"}, 26 {10102, nullptr, "UpdateFriendInfo"},
27 {10110, nullptr, "GetFriendProfileImage"}, 27 {10110, nullptr, "GetFriendProfileImage"},
28 {10120, nullptr, "Unknown10120"},
29 {10121, nullptr, "Unknown10121"},
28 {10200, nullptr, "SendFriendRequestForApplication"}, 30 {10200, nullptr, "SendFriendRequestForApplication"},
29 {10211, nullptr, "AddFacedFriendRequestForApplication"}, 31 {10211, nullptr, "AddFacedFriendRequestForApplication"},
30 {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, 32 {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
33 {10420, nullptr, "Unknown10420"},
34 {10421, nullptr, "Unknown10421"},
31 {10500, nullptr, "GetProfileList"}, 35 {10500, nullptr, "GetProfileList"},
32 {10600, nullptr, "DeclareOpenOnlinePlaySession"}, 36 {10600, nullptr, "DeclareOpenOnlinePlaySession"},
33 {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, 37 {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -97,6 +101,8 @@ public:
97 {30900, nullptr, "SendFriendInvitation"}, 101 {30900, nullptr, "SendFriendInvitation"},
98 {30910, nullptr, "ReadFriendInvitation"}, 102 {30910, nullptr, "ReadFriendInvitation"},
99 {30911, nullptr, "ReadAllFriendInvitations"}, 103 {30911, nullptr, "ReadAllFriendInvitations"},
104 {40100, nullptr, "Unknown40100"},
105 {40400, nullptr, "Unknown40400"},
100 {49900, nullptr, "DeleteNetworkServiceAccountCache"}, 106 {49900, nullptr, "DeleteNetworkServiceAccountCache"},
101 }; 107 };
102 // clang-format on 108 // clang-format on
diff --git a/src/core/hle/service/grc/grc.cpp b/src/core/hle/service/grc/grc.cpp
index 24910ac6c..401e0b208 100644
--- a/src/core/hle/service/grc/grc.cpp
+++ b/src/core/hle/service/grc/grc.cpp
@@ -17,6 +17,9 @@ public:
17 static const FunctionInfo functions[] = { 17 static const FunctionInfo functions[] = {
18 {1, nullptr, "OpenContinuousRecorder"}, 18 {1, nullptr, "OpenContinuousRecorder"},
19 {2, nullptr, "OpenGameMovieTrimmer"}, 19 {2, nullptr, "OpenGameMovieTrimmer"},
20 {3, nullptr, "OpenOffscreenRecorder"},
21 {101, nullptr, "CreateMovieMaker"},
22 {9903, nullptr, "SetOffscreenRecordingMarker"}
20 }; 23 };
21 // clang-format on 24 // clang-format on
22 25
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index c55d900e2..6fbee7efa 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -566,6 +566,14 @@ void Controller_NPad::DisconnectNPad(u32 npad_id) {
566 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false; 566 connected_controllers[NPadIdToIndex(npad_id)].is_connected = false;
567} 567}
568 568
569void Controller_NPad::SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode) {
570 gyroscope_zero_drift_mode = drift_mode;
571}
572
573Controller_NPad::GyroscopeZeroDriftMode Controller_NPad::GetGyroscopeZeroDriftMode() const {
574 return gyroscope_zero_drift_mode;
575}
576
569void Controller_NPad::StartLRAssignmentMode() { 577void Controller_NPad::StartLRAssignmentMode() {
570 // Nothing internally is used for lr assignment mode. Since we have the ability to set the 578 // Nothing internally is used for lr assignment mode. Since we have the ability to set the
571 // controller types from boot, it doesn't really matter about showing a selection screen 579 // controller types from boot, it doesn't really matter about showing a selection screen
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 931f03430..5d4c58a43 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -58,6 +58,12 @@ public:
58 }; 58 };
59 static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size"); 59 static_assert(sizeof(Vibration) == 0x10, "Vibration is an invalid size");
60 60
61 enum class GyroscopeZeroDriftMode : u32 {
62 Loose = 0,
63 Standard = 1,
64 Tight = 2,
65 };
66
61 enum class NpadHoldType : u64 { 67 enum class NpadHoldType : u64 {
62 Vertical = 0, 68 Vertical = 0,
63 Horizontal = 1, 69 Horizontal = 1,
@@ -117,6 +123,8 @@ public:
117 123
118 void ConnectNPad(u32 npad_id); 124 void ConnectNPad(u32 npad_id);
119 void DisconnectNPad(u32 npad_id); 125 void DisconnectNPad(u32 npad_id);
126 void SetGyroscopeZeroDriftMode(GyroscopeZeroDriftMode drift_mode);
127 GyroscopeZeroDriftMode GetGyroscopeZeroDriftMode() const;
120 LedPattern GetLedPattern(u32 npad_id); 128 LedPattern GetLedPattern(u32 npad_id);
121 void SetVibrationEnabled(bool can_vibrate); 129 void SetVibrationEnabled(bool can_vibrate);
122 bool IsVibrationEnabled() const; 130 bool IsVibrationEnabled() const;
@@ -324,8 +332,8 @@ private:
324 std::array<Kernel::EventPair, 10> styleset_changed_events; 332 std::array<Kernel::EventPair, 10> styleset_changed_events;
325 Vibration last_processed_vibration{}; 333 Vibration last_processed_vibration{};
326 std::array<ControllerHolder, 10> connected_controllers{}; 334 std::array<ControllerHolder, 10> connected_controllers{};
335 GyroscopeZeroDriftMode gyroscope_zero_drift_mode{GyroscopeZeroDriftMode::Standard};
327 bool can_controllers_vibrate{true}; 336 bool can_controllers_vibrate{true};
328
329 std::array<ControllerPad, 10> npad_pad_states{}; 337 std::array<ControllerPad, 10> npad_pad_states{};
330 bool is_in_lr_assignment_mode{false}; 338 bool is_in_lr_assignment_mode{false};
331 Core::System& system; 339 Core::System& system;
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index c84cb1483..57d5edea7 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
161 {40, nullptr, "AcquireXpadIdEventHandle"}, 161 {40, nullptr, "AcquireXpadIdEventHandle"},
162 {41, nullptr, "ReleaseXpadIdEventHandle"}, 162 {41, nullptr, "ReleaseXpadIdEventHandle"},
163 {51, &Hid::ActivateXpad, "ActivateXpad"}, 163 {51, &Hid::ActivateXpad, "ActivateXpad"},
164 {55, nullptr, "GetXpadIds"}, 164 {55, &Hid::GetXpadIDs, "GetXpadIds"},
165 {56, nullptr, "ActivateJoyXpad"}, 165 {56, nullptr, "ActivateJoyXpad"},
166 {58, nullptr, "GetJoyXpadLifoHandle"}, 166 {58, nullptr, "GetJoyXpadLifoHandle"},
167 {59, nullptr, "GetJoyXpadIds"}, 167 {59, nullptr, "GetJoyXpadIds"},
@@ -185,8 +185,8 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
185 {77, nullptr, "GetAccelerometerPlayMode"}, 185 {77, nullptr, "GetAccelerometerPlayMode"},
186 {78, nullptr, "ResetAccelerometerPlayMode"}, 186 {78, nullptr, "ResetAccelerometerPlayMode"},
187 {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"}, 187 {79, &Hid::SetGyroscopeZeroDriftMode, "SetGyroscopeZeroDriftMode"},
188 {80, nullptr, "GetGyroscopeZeroDriftMode"}, 188 {80, &Hid::GetGyroscopeZeroDriftMode, "GetGyroscopeZeroDriftMode"},
189 {81, nullptr, "ResetGyroscopeZeroDriftMode"}, 189 {81, &Hid::ResetGyroscopeZeroDriftMode, "ResetGyroscopeZeroDriftMode"},
190 {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"}, 190 {82, &Hid::IsSixAxisSensorAtRest, "IsSixAxisSensorAtRest"},
191 {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"}, 191 {83, nullptr, "IsFirmwareUpdateAvailableForSixAxisSensor"},
192 {91, &Hid::ActivateGesture, "ActivateGesture"}, 192 {91, &Hid::ActivateGesture, "ActivateGesture"},
@@ -230,15 +230,15 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
230 {211, nullptr, "IsVibrationDeviceMounted"}, 230 {211, nullptr, "IsVibrationDeviceMounted"},
231 {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"}, 231 {300, &Hid::ActivateConsoleSixAxisSensor, "ActivateConsoleSixAxisSensor"},
232 {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"}, 232 {301, &Hid::StartConsoleSixAxisSensor, "StartConsoleSixAxisSensor"},
233 {302, nullptr, "StopConsoleSixAxisSensor"}, 233 {302, &Hid::StopConsoleSixAxisSensor, "StopConsoleSixAxisSensor"},
234 {303, nullptr, "ActivateSevenSixAxisSensor"}, 234 {303, &Hid::ActivateSevenSixAxisSensor, "ActivateSevenSixAxisSensor"},
235 {304, nullptr, "StartSevenSixAxisSensor"}, 235 {304, &Hid::StartSevenSixAxisSensor, "StartSevenSixAxisSensor"},
236 {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"}, 236 {305, &Hid::StopSevenSixAxisSensor, "StopSevenSixAxisSensor"},
237 {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"}, 237 {306, &Hid::InitializeSevenSixAxisSensor, "InitializeSevenSixAxisSensor"},
238 {307, nullptr, "FinalizeSevenSixAxisSensor"}, 238 {307, &Hid::FinalizeSevenSixAxisSensor, "FinalizeSevenSixAxisSensor"},
239 {308, nullptr, "SetSevenSixAxisSensorFusionStrength"}, 239 {308, nullptr, "SetSevenSixAxisSensorFusionStrength"},
240 {309, nullptr, "GetSevenSixAxisSensorFusionStrength"}, 240 {309, nullptr, "GetSevenSixAxisSensorFusionStrength"},
241 {310, nullptr, "ResetSevenSixAxisSensorTimestamp"}, 241 {310, &Hid::ResetSevenSixAxisSensorTimestamp, "ResetSevenSixAxisSensorTimestamp"},
242 {400, nullptr, "IsUsbFullKeyControllerEnabled"}, 242 {400, nullptr, "IsUsbFullKeyControllerEnabled"},
243 {401, nullptr, "EnableUsbFullKeyController"}, 243 {401, nullptr, "EnableUsbFullKeyController"},
244 {402, nullptr, "IsUsbFullKeyControllerConnected"}, 244 {402, nullptr, "IsUsbFullKeyControllerConnected"},
@@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
319 rb.Push(RESULT_SUCCESS); 319 rb.Push(RESULT_SUCCESS);
320} 320}
321 321
322void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
323 IPC::RequestParser rp{ctx};
324 const auto applet_resource_user_id{rp.Pop<u64>()};
325
326 LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id);
327
328 IPC::ResponseBuilder rb{ctx, 3};
329 rb.Push(RESULT_SUCCESS);
330 rb.Push(0);
331}
332
322void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { 333void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
323 IPC::RequestParser rp{ctx}; 334 IPC::RequestParser rp{ctx};
324 const auto applet_resource_user_id{rp.Pop<u64>()}; 335 const auto applet_resource_user_id{rp.Pop<u64>()};
@@ -363,6 +374,15 @@ void Hid::ActivateKeyboard(Kernel::HLERequestContext& ctx) {
363 rb.Push(RESULT_SUCCESS); 374 rb.Push(RESULT_SUCCESS);
364} 375}
365 376
377void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) {
378 IPC::RequestParser rp{ctx};
379 const auto flags{rp.Pop<u32>()};
380 LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags);
381
382 IPC::ResponseBuilder rb{ctx, 2};
383 rb.Push(RESULT_SUCCESS);
384}
385
366void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) { 386void Hid::ActivateGesture(Kernel::HLERequestContext& ctx) {
367 IPC::RequestParser rp{ctx}; 387 IPC::RequestParser rp{ctx};
368 const auto unknown{rp.Pop<u32>()}; 388 const auto unknown{rp.Pop<u32>()};
@@ -402,15 +422,59 @@ void Hid::StartSixAxisSensor(Kernel::HLERequestContext& ctx) {
402 rb.Push(RESULT_SUCCESS); 422 rb.Push(RESULT_SUCCESS);
403} 423}
404 424
425void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) {
426 IPC::RequestParser rp{ctx};
427 const auto handle{rp.Pop<u32>()};
428 const auto applet_resource_user_id{rp.Pop<u64>()};
429
430 LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
431 applet_resource_user_id);
432
433 IPC::ResponseBuilder rb{ctx, 2};
434 rb.Push(RESULT_SUCCESS);
435}
436
405void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) { 437void Hid::SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
406 IPC::RequestParser rp{ctx}; 438 IPC::RequestParser rp{ctx};
407 const auto handle{rp.Pop<u32>()}; 439 const auto handle{rp.Pop<u32>()};
408 const auto drift_mode{rp.Pop<u32>()}; 440 const auto drift_mode{rp.Pop<u32>()};
409 const auto applet_resource_user_id{rp.Pop<u64>()}; 441 const auto applet_resource_user_id{rp.Pop<u64>()};
410 442
411 LOG_WARNING(Service_HID, 443 applet_resource->GetController<Controller_NPad>(HidController::NPad)
412 "(STUBBED) called, handle={}, drift_mode={}, applet_resource_user_id={}", handle, 444 .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode{drift_mode});
413 drift_mode, applet_resource_user_id); 445
446 LOG_DEBUG(Service_HID, "called, handle={}, drift_mode={}, applet_resource_user_id={}", handle,
447 drift_mode, applet_resource_user_id);
448
449 IPC::ResponseBuilder rb{ctx, 2};
450 rb.Push(RESULT_SUCCESS);
451}
452
453void Hid::GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
454 IPC::RequestParser rp{ctx};
455 const auto handle{rp.Pop<u32>()};
456 const auto applet_resource_user_id{rp.Pop<u64>()};
457
458 LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
459 applet_resource_user_id);
460
461 IPC::ResponseBuilder rb{ctx, 3};
462 rb.Push(RESULT_SUCCESS);
463 rb.Push<u32>(
464 static_cast<u32>(applet_resource->GetController<Controller_NPad>(HidController::NPad)
465 .GetGyroscopeZeroDriftMode()));
466}
467
468void Hid::ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx) {
469 IPC::RequestParser rp{ctx};
470 const auto handle{rp.Pop<u32>()};
471 const auto applet_resource_user_id{rp.Pop<u64>()};
472
473 applet_resource->GetController<Controller_NPad>(HidController::NPad)
474 .SetGyroscopeZeroDriftMode(Controller_NPad::GyroscopeZeroDriftMode::Standard);
475
476 LOG_DEBUG(Service_HID, "called, handle={}, applet_resource_user_id={}", handle,
477 applet_resource_user_id);
414 478
415 IPC::ResponseBuilder rb{ctx, 2}; 479 IPC::ResponseBuilder rb{ctx, 2};
416 rb.Push(RESULT_SUCCESS); 480 rb.Push(RESULT_SUCCESS);
@@ -821,33 +885,35 @@ void Hid::StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
821 rb.Push(RESULT_SUCCESS); 885 rb.Push(RESULT_SUCCESS);
822} 886}
823 887
824void Hid::StopSixAxisSensor(Kernel::HLERequestContext& ctx) { 888void Hid::StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx) {
825 IPC::RequestParser rp{ctx}; 889 IPC::RequestParser rp{ctx};
826 const auto handle{rp.Pop<u32>()}; 890 const auto handle{rp.Pop<u32>()};
891 const auto applet_resource_user_id{rp.Pop<u64>()};
827 892
828 LOG_WARNING(Service_HID, "(STUBBED) called, handle={}", handle); 893 LOG_WARNING(Service_HID, "(STUBBED) called, handle={}, applet_resource_user_id={}", handle,
894 applet_resource_user_id);
829 895
830 IPC::ResponseBuilder rb{ctx, 2}; 896 IPC::ResponseBuilder rb{ctx, 2};
831 rb.Push(RESULT_SUCCESS); 897 rb.Push(RESULT_SUCCESS);
832} 898}
833 899
834void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) { 900void Hid::ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
835 IPC::RequestParser rp{ctx}; 901 IPC::RequestParser rp{ctx};
836 const auto applet_resource_user_id{rp.Pop<u64>()}; 902 const auto applet_resource_user_id{rp.Pop<u64>()};
837 const auto unknown{rp.Pop<u32>()};
838 903
839 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}, unknown={}", 904 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
840 applet_resource_user_id, unknown); 905 applet_resource_user_id);
841 906
842 IPC::ResponseBuilder rb{ctx, 2}; 907 IPC::ResponseBuilder rb{ctx, 2};
843 rb.Push(RESULT_SUCCESS); 908 rb.Push(RESULT_SUCCESS);
844} 909}
845 910
846void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) { 911void Hid::StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
847 IPC::RequestParser rp{ctx}; 912 IPC::RequestParser rp{ctx};
848 const auto unknown{rp.Pop<u32>()}; 913 const auto applet_resource_user_id{rp.Pop<u64>()};
849 914
850 LOG_WARNING(Service_HID, "(STUBBED) called, unknown={}", unknown); 915 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
916 applet_resource_user_id);
851 917
852 IPC::ResponseBuilder rb{ctx, 2}; 918 IPC::ResponseBuilder rb{ctx, 2};
853 rb.Push(RESULT_SUCCESS); 919 rb.Push(RESULT_SUCCESS);
@@ -871,10 +937,46 @@ void Hid::InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
871 rb.Push(RESULT_SUCCESS); 937 rb.Push(RESULT_SUCCESS);
872} 938}
873 939
874void Hid::SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx) { 940void Hid::FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx) {
875 IPC::RequestParser rp{ctx}; 941 IPC::RequestParser rp{ctx};
876 const auto flags{rp.Pop<u32>()}; 942 const auto applet_resource_user_id{rp.Pop<u64>()};
877 LOG_WARNING(Service_HID, "(STUBBED) called. flags={}", flags); 943
944 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
945 applet_resource_user_id);
946
947 IPC::ResponseBuilder rb{ctx, 2};
948 rb.Push(RESULT_SUCCESS);
949}
950
951void Hid::ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx) {
952 IPC::RequestParser rp{ctx};
953 const auto applet_resource_user_id{rp.Pop<u64>()};
954
955 LOG_WARNING(Service_HID, "(STUBBED) called, applet_resource_user_id={}",
956 applet_resource_user_id);
957
958 IPC::ResponseBuilder rb{ctx, 2};
959 rb.Push(RESULT_SUCCESS);
960}
961
962void Hid::SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx) {
963 IPC::RequestParser rp{ctx};
964 const auto applet_resource_user_id{rp.Pop<u64>()};
965 const auto is_palma_all_connectable{rp.Pop<bool>()};
966
967 LOG_WARNING(Service_HID,
968 "(STUBBED) called, applet_resource_user_id={}, is_palma_all_connectable={}",
969 applet_resource_user_id, is_palma_all_connectable);
970
971 IPC::ResponseBuilder rb{ctx, 2};
972 rb.Push(RESULT_SUCCESS);
973}
974
975void Hid::SetPalmaBoostMode(Kernel::HLERequestContext& ctx) {
976 IPC::RequestParser rp{ctx};
977 const auto palma_boost_mode{rp.Pop<bool>()};
978
979 LOG_WARNING(Service_HID, "(STUBBED) called, palma_boost_mode={}", palma_boost_mode);
878 980
879 IPC::ResponseBuilder rb{ctx, 2}; 981 IPC::ResponseBuilder rb{ctx, 2};
880 rb.Push(RESULT_SUCCESS); 982 rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index c8ed4ad8b..6fb048360 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -86,14 +86,19 @@ public:
86private: 86private:
87 void CreateAppletResource(Kernel::HLERequestContext& ctx); 87 void CreateAppletResource(Kernel::HLERequestContext& ctx);
88 void ActivateXpad(Kernel::HLERequestContext& ctx); 88 void ActivateXpad(Kernel::HLERequestContext& ctx);
89 void GetXpadIDs(Kernel::HLERequestContext& ctx);
89 void ActivateDebugPad(Kernel::HLERequestContext& ctx); 90 void ActivateDebugPad(Kernel::HLERequestContext& ctx);
90 void ActivateTouchScreen(Kernel::HLERequestContext& ctx); 91 void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
91 void ActivateMouse(Kernel::HLERequestContext& ctx); 92 void ActivateMouse(Kernel::HLERequestContext& ctx);
92 void ActivateKeyboard(Kernel::HLERequestContext& ctx); 93 void ActivateKeyboard(Kernel::HLERequestContext& ctx);
94 void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx);
93 void ActivateGesture(Kernel::HLERequestContext& ctx); 95 void ActivateGesture(Kernel::HLERequestContext& ctx);
94 void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx); 96 void ActivateNpadWithRevision(Kernel::HLERequestContext& ctx);
95 void StartSixAxisSensor(Kernel::HLERequestContext& ctx); 97 void StartSixAxisSensor(Kernel::HLERequestContext& ctx);
98 void StopSixAxisSensor(Kernel::HLERequestContext& ctx);
96 void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx); 99 void SetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
100 void GetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
101 void ResetGyroscopeZeroDriftMode(Kernel::HLERequestContext& ctx);
97 void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx); 102 void IsSixAxisSensorAtRest(Kernel::HLERequestContext& ctx);
98 void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); 103 void SetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
99 void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx); 104 void GetSupportedNpadStyleSet(Kernel::HLERequestContext& ctx);
@@ -125,12 +130,15 @@ private:
125 void IsVibrationPermitted(Kernel::HLERequestContext& ctx); 130 void IsVibrationPermitted(Kernel::HLERequestContext& ctx);
126 void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); 131 void ActivateConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
127 void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx); 132 void StartConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
128 void StopSixAxisSensor(Kernel::HLERequestContext& ctx); 133 void StopConsoleSixAxisSensor(Kernel::HLERequestContext& ctx);
129 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx); 134 void ActivateSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
130 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx); 135 void StartSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
131 void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx); 136 void StopSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
132 void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx); 137 void InitializeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
133 void SendKeyboardLockKeyEvent(Kernel::HLERequestContext& ctx); 138 void FinalizeSevenSixAxisSensor(Kernel::HLERequestContext& ctx);
139 void ResetSevenSixAxisSensorTimestamp(Kernel::HLERequestContext& ctx);
140 void SetIsPalmaAllConnectable(Kernel::HLERequestContext& ctx);
141 void SetPalmaBoostMode(Kernel::HLERequestContext& ctx);
134 142
135 std::shared_ptr<IAppletResource> applet_resource; 143 std::shared_ptr<IAppletResource> applet_resource;
136 Core::System& system; 144 Core::System& system;
diff --git a/src/core/hle/service/lbl/lbl.cpp b/src/core/hle/service/lbl/lbl.cpp
index e8f9f2d29..17350b403 100644
--- a/src/core/hle/service/lbl/lbl.cpp
+++ b/src/core/hle/service/lbl/lbl.cpp
@@ -47,6 +47,7 @@ public:
47 {26, &LBL::EnableVrMode, "EnableVrMode"}, 47 {26, &LBL::EnableVrMode, "EnableVrMode"},
48 {27, &LBL::DisableVrMode, "DisableVrMode"}, 48 {27, &LBL::DisableVrMode, "DisableVrMode"},
49 {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"}, 49 {28, &LBL::IsVrModeEnabled, "IsVrModeEnabled"},
50 {29, nullptr, "IsAutoBrightnessControlSupported"},
50 }; 51 };
51 // clang-format on 52 // clang-format on
52 53
diff --git a/src/core/hle/service/ldn/ldn.cpp b/src/core/hle/service/ldn/ldn.cpp
index 92adde6d4..49972cd69 100644
--- a/src/core/hle/service/ldn/ldn.cpp
+++ b/src/core/hle/service/ldn/ldn.cpp
@@ -69,6 +69,7 @@ public:
69 {101, nullptr, "GetNetworkInfoLatestUpdate"}, 69 {101, nullptr, "GetNetworkInfoLatestUpdate"},
70 {102, nullptr, "Scan"}, 70 {102, nullptr, "Scan"},
71 {103, nullptr, "ScanPrivate"}, 71 {103, nullptr, "ScanPrivate"},
72 {104, nullptr, "SetWirelessControllerRestriction"},
72 {200, nullptr, "OpenAccessPoint"}, 73 {200, nullptr, "OpenAccessPoint"},
73 {201, nullptr, "CloseAccessPoint"}, 74 {201, nullptr, "CloseAccessPoint"},
74 {202, nullptr, "CreateNetwork"}, 75 {202, nullptr, "CreateNetwork"},
diff --git a/src/core/hle/service/lm/manager.cpp b/src/core/hle/service/lm/manager.cpp
index b67081b86..3ee2374e7 100644
--- a/src/core/hle/service/lm/manager.cpp
+++ b/src/core/hle/service/lm/manager.cpp
@@ -86,7 +86,8 @@ std::string FormatField(Field type, const std::vector<u8>& data) {
86 return Common::StringFromFixedZeroTerminatedBuffer( 86 return Common::StringFromFixedZeroTerminatedBuffer(
87 reinterpret_cast<const char*>(data.data()), data.size()); 87 reinterpret_cast<const char*>(data.data()), data.size());
88 default: 88 default:
89 UNIMPLEMENTED(); 89 UNIMPLEMENTED_MSG("Unimplemented field type={}", type);
90 return "";
90 } 91 }
91} 92}
92 93
diff --git a/src/core/hle/service/mig/mig.cpp b/src/core/hle/service/mig/mig.cpp
index d16367f2c..113a4665c 100644
--- a/src/core/hle/service/mig/mig.cpp
+++ b/src/core/hle/service/mig/mig.cpp
@@ -20,6 +20,12 @@ public:
20 {101, nullptr, "ResumeServer"}, 20 {101, nullptr, "ResumeServer"},
21 {200, nullptr, "CreateClient"}, 21 {200, nullptr, "CreateClient"},
22 {201, nullptr, "ResumeClient"}, 22 {201, nullptr, "ResumeClient"},
23 {1001, nullptr, "Unknown1001"},
24 {1010, nullptr, "Unknown1010"},
25 {1100, nullptr, "Unknown1100"},
26 {1101, nullptr, "Unknown1101"},
27 {1200, nullptr, "Unknown1200"},
28 {1201, nullptr, "Unknown1201"}
23 }; 29 };
24 // clang-format on 30 // clang-format on
25 31
diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp
index def63dc8a..25c24e537 100644
--- a/src/core/hle/service/mm/mm_u.cpp
+++ b/src/core/hle/service/mm/mm_u.cpp
@@ -14,14 +14,14 @@ public:
14 explicit MM_U() : ServiceFramework{"mm:u"} { 14 explicit MM_U() : ServiceFramework{"mm:u"} {
15 // clang-format off 15 // clang-format off
16 static const FunctionInfo functions[] = { 16 static const FunctionInfo functions[] = {
17 {0, &MM_U::Initialize, "Initialize"}, 17 {0, &MM_U::InitializeOld, "InitializeOld"},
18 {1, &MM_U::Finalize, "Finalize"}, 18 {1, &MM_U::FinalizeOld, "FinalizeOld"},
19 {2, &MM_U::SetAndWait, "SetAndWait"}, 19 {2, &MM_U::SetAndWaitOld, "SetAndWaitOld"},
20 {3, &MM_U::Get, "Get"}, 20 {3, &MM_U::GetOld, "GetOld"},
21 {4, &MM_U::InitializeWithId, "InitializeWithId"}, 21 {4, &MM_U::Initialize, "Initialize"},
22 {5, &MM_U::FinalizeWithId, "FinalizeWithId"}, 22 {5, &MM_U::Finalize, "Finalize"},
23 {6, &MM_U::SetAndWaitWithId, "SetAndWaitWithId"}, 23 {6, &MM_U::SetAndWait, "SetAndWait"},
24 {7, &MM_U::GetWithId, "GetWithId"}, 24 {7, &MM_U::Get, "Get"},
25 }; 25 };
26 // clang-format on 26 // clang-format on
27 27
@@ -29,21 +29,21 @@ public:
29 } 29 }
30 30
31private: 31private:
32 void Initialize(Kernel::HLERequestContext& ctx) { 32 void InitializeOld(Kernel::HLERequestContext& ctx) {
33 LOG_WARNING(Service_MM, "(STUBBED) called"); 33 LOG_WARNING(Service_MM, "(STUBBED) called");
34 34
35 IPC::ResponseBuilder rb{ctx, 2}; 35 IPC::ResponseBuilder rb{ctx, 2};
36 rb.Push(RESULT_SUCCESS); 36 rb.Push(RESULT_SUCCESS);
37 } 37 }
38 38
39 void Finalize(Kernel::HLERequestContext& ctx) { 39 void FinalizeOld(Kernel::HLERequestContext& ctx) {
40 LOG_WARNING(Service_MM, "(STUBBED) called"); 40 LOG_WARNING(Service_MM, "(STUBBED) called");
41 41
42 IPC::ResponseBuilder rb{ctx, 2}; 42 IPC::ResponseBuilder rb{ctx, 2};
43 rb.Push(RESULT_SUCCESS); 43 rb.Push(RESULT_SUCCESS);
44 } 44 }
45 45
46 void SetAndWait(Kernel::HLERequestContext& ctx) { 46 void SetAndWaitOld(Kernel::HLERequestContext& ctx) {
47 IPC::RequestParser rp{ctx}; 47 IPC::RequestParser rp{ctx};
48 min = rp.Pop<u32>(); 48 min = rp.Pop<u32>();
49 max = rp.Pop<u32>(); 49 max = rp.Pop<u32>();
@@ -54,7 +54,7 @@ private:
54 rb.Push(RESULT_SUCCESS); 54 rb.Push(RESULT_SUCCESS);
55 } 55 }
56 56
57 void Get(Kernel::HLERequestContext& ctx) { 57 void GetOld(Kernel::HLERequestContext& ctx) {
58 LOG_WARNING(Service_MM, "(STUBBED) called"); 58 LOG_WARNING(Service_MM, "(STUBBED) called");
59 59
60 IPC::ResponseBuilder rb{ctx, 3}; 60 IPC::ResponseBuilder rb{ctx, 3};
@@ -62,7 +62,7 @@ private:
62 rb.Push(current); 62 rb.Push(current);
63 } 63 }
64 64
65 void InitializeWithId(Kernel::HLERequestContext& ctx) { 65 void Initialize(Kernel::HLERequestContext& ctx) {
66 LOG_WARNING(Service_MM, "(STUBBED) called"); 66 LOG_WARNING(Service_MM, "(STUBBED) called");
67 67
68 IPC::ResponseBuilder rb{ctx, 3}; 68 IPC::ResponseBuilder rb{ctx, 3};
@@ -70,14 +70,14 @@ private:
70 rb.Push<u32>(id); // Any non zero value 70 rb.Push<u32>(id); // Any non zero value
71 } 71 }
72 72
73 void FinalizeWithId(Kernel::HLERequestContext& ctx) { 73 void Finalize(Kernel::HLERequestContext& ctx) {
74 LOG_WARNING(Service_MM, "(STUBBED) called"); 74 LOG_WARNING(Service_MM, "(STUBBED) called");
75 75
76 IPC::ResponseBuilder rb{ctx, 2}; 76 IPC::ResponseBuilder rb{ctx, 2};
77 rb.Push(RESULT_SUCCESS); 77 rb.Push(RESULT_SUCCESS);
78 } 78 }
79 79
80 void SetAndWaitWithId(Kernel::HLERequestContext& ctx) { 80 void SetAndWait(Kernel::HLERequestContext& ctx) {
81 IPC::RequestParser rp{ctx}; 81 IPC::RequestParser rp{ctx};
82 u32 input_id = rp.Pop<u32>(); 82 u32 input_id = rp.Pop<u32>();
83 min = rp.Pop<u32>(); 83 min = rp.Pop<u32>();
@@ -90,7 +90,7 @@ private:
90 rb.Push(RESULT_SUCCESS); 90 rb.Push(RESULT_SUCCESS);
91 } 91 }
92 92
93 void GetWithId(Kernel::HLERequestContext& ctx) { 93 void Get(Kernel::HLERequestContext& ctx) {
94 LOG_WARNING(Service_MM, "(STUBBED) called"); 94 LOG_WARNING(Service_MM, "(STUBBED) called");
95 95
96 IPC::ResponseBuilder rb{ctx, 3}; 96 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/ncm/ncm.cpp b/src/core/hle/service/ncm/ncm.cpp
index ec9aae04a..e38dea1f4 100644
--- a/src/core/hle/service/ncm/ncm.cpp
+++ b/src/core/hle/service/ncm/ncm.cpp
@@ -28,16 +28,16 @@ public:
28 {7, nullptr, "ResolveApplicationLegalInformationPath"}, 28 {7, nullptr, "ResolveApplicationLegalInformationPath"},
29 {8, nullptr, "RedirectApplicationLegalInformationPath"}, 29 {8, nullptr, "RedirectApplicationLegalInformationPath"},
30 {9, nullptr, "Refresh"}, 30 {9, nullptr, "Refresh"},
31 {10, nullptr, "RedirectProgramPath2"}, 31 {10, nullptr, "RedirectApplicationProgramPath"},
32 {11, nullptr, "Refresh2"}, 32 {11, nullptr, "ClearApplicationRedirection"},
33 {12, nullptr, "DeleteProgramPath"}, 33 {12, nullptr, "EraseProgramRedirection"},
34 {13, nullptr, "DeleteApplicationControlPath"}, 34 {13, nullptr, "EraseApplicationControlRedirection"},
35 {14, nullptr, "DeleteApplicationHtmlDocumentPath"}, 35 {14, nullptr, "EraseApplicationHtmlDocumentRedirection"},
36 {15, nullptr, "DeleteApplicationLegalInformationPath"}, 36 {15, nullptr, "EraseApplicationLegalInformationRedirection"},
37 {16, nullptr, ""}, 37 {16, nullptr, "ResolveProgramPathForDebug"},
38 {17, nullptr, ""}, 38 {17, nullptr, "RedirectProgramPathForDebug"},
39 {18, nullptr, ""}, 39 {18, nullptr, "RedirectApplicationProgramPathForDebug"},
40 {19, nullptr, ""}, 40 {19, nullptr, "EraseProgramRedirectionForDebug"},
41 }; 41 };
42 // clang-format on 42 // clang-format on
43 43
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index b7b34ce7e..780ea30fe 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -198,9 +198,9 @@ public:
198 static const FunctionInfo functions[] = { 198 static const FunctionInfo functions[] = {
199 {0, nullptr, "Initialize"}, 199 {0, nullptr, "Initialize"},
200 {1, nullptr, "Finalize"}, 200 {1, nullptr, "Finalize"},
201 {2, nullptr, "GetState"}, 201 {2, nullptr, "GetStateOld"},
202 {3, nullptr, "IsNfcEnabled"}, 202 {3, nullptr, "IsNfcEnabledOld"},
203 {100, nullptr, "SetNfcEnabled"}, 203 {100, nullptr, "SetNfcEnabledOld"},
204 {400, nullptr, "InitializeSystem"}, 204 {400, nullptr, "InitializeSystem"},
205 {401, nullptr, "FinalizeSystem"}, 205 {401, nullptr, "FinalizeSystem"},
206 {402, nullptr, "GetState"}, 206 {402, nullptr, "GetState"},
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index cc2192e5c..0d913334e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input,
25 case IoctlCommand::IocGetCharacteristicsCommand: 25 case IoctlCommand::IocGetCharacteristicsCommand:
26 return GetCharacteristics(input, output, output2, version); 26 return GetCharacteristics(input, output, output2, version);
27 case IoctlCommand::IocGetTPCMasksCommand: 27 case IoctlCommand::IocGetTPCMasksCommand:
28 return GetTPCMasks(input, output); 28 return GetTPCMasks(input, output, output2, version);
29 case IoctlCommand::IocGetActiveSlotMaskCommand: 29 case IoctlCommand::IocGetActiveSlotMaskCommand:
30 return GetActiveSlotMask(input, output); 30 return GetActiveSlotMask(input, output);
31 case IoctlCommand::IocZcullGetCtxSizeCommand: 31 case IoctlCommand::IocZcullGetCtxSizeCommand:
@@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto
98 return 0; 98 return 0;
99} 99}
100 100
101u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) { 101u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output,
102 std::vector<u8>& output2, IoctlVersion version) {
102 IoctlGpuGetTpcMasksArgs params{}; 103 IoctlGpuGetTpcMasksArgs params{};
103 std::memcpy(&params, input.data(), input.size()); 104 std::memcpy(&params, input.data(), input.size());
104 LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size, 105 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
105 params.mask_buf_addr); 106 if (params.mask_buffer_size != 0) {
106 // TODO(ogniK): Confirm value on hardware 107 params.tcp_mask = 3;
107 if (params.mask_buf_size) 108 }
108 params.tpc_mask_size = 4 * 1; // 4 * num_gpc 109
109 else 110 if (version == IoctlVersion::Version3) {
110 params.tpc_mask_size = 0; 111 std::memcpy(output.data(), input.data(), output.size());
111 std::memcpy(output.data(), &params, sizeof(params)); 112 std::memcpy(output2.data(), &params.tcp_mask, output2.size());
113 } else {
114 std::memcpy(output.data(), &params, output.size());
115 }
116
112 return 0; 117 return 0;
113} 118}
114 119
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 07b644ec5..ef60f72ce 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -92,16 +92,11 @@ private:
92 "IoctlCharacteristics is incorrect size"); 92 "IoctlCharacteristics is incorrect size");
93 93
94 struct IoctlGpuGetTpcMasksArgs { 94 struct IoctlGpuGetTpcMasksArgs {
95 /// [in] TPC mask buffer size reserved by userspace. Should be at least 95 u32_le mask_buffer_size{};
96 /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC. 96 INSERT_PADDING_WORDS(1);
97 /// [out] full kernel buffer size 97 u64_le mask_buffer_address{};
98 u32_le mask_buf_size; 98 u32_le tcp_mask{};
99 u32_le reserved; 99 INSERT_PADDING_WORDS(1);
100
101 /// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if
102 /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0.
103 u64_le mask_buf_addr;
104 u64_le tpc_mask_size; // Nintendo add this?
105 }; 100 };
106 static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24, 101 static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24,
107 "IoctlGpuGetTpcMasksArgs is incorrect size"); 102 "IoctlGpuGetTpcMasksArgs is incorrect size");
@@ -166,7 +161,8 @@ private:
166 161
167 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output, 162 u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output,
168 std::vector<u8>& output2, IoctlVersion version); 163 std::vector<u8>& output2, IoctlVersion version);
169 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); 164 u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2,
165 IoctlVersion version);
170 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); 166 u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
171 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); 167 u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
172 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); 168 u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
diff --git a/src/core/host_timing.cpp b/src/core/host_timing.cpp
new file mode 100644
index 000000000..2f40de1a1
--- /dev/null
+++ b/src/core/host_timing.cpp
@@ -0,0 +1,206 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/host_timing.h"
6
7#include <algorithm>
8#include <mutex>
9#include <string>
10#include <tuple>
11
12#include "common/assert.h"
13#include "core/core_timing_util.h"
14
15namespace Core::HostTiming {
16
17std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback) {
18 return std::make_shared<EventType>(std::move(callback), std::move(name));
19}
20
21struct CoreTiming::Event {
22 u64 time;
23 u64 fifo_order;
24 u64 userdata;
25 std::weak_ptr<EventType> type;
26
27 // Sort by time, unless the times are the same, in which case sort by
28 // the order added to the queue
29 friend bool operator>(const Event& left, const Event& right) {
30 return std::tie(left.time, left.fifo_order) > std::tie(right.time, right.fifo_order);
31 }
32
33 friend bool operator<(const Event& left, const Event& right) {
34 return std::tie(left.time, left.fifo_order) < std::tie(right.time, right.fifo_order);
35 }
36};
37
38CoreTiming::CoreTiming() {
39 clock =
40 Common::CreateBestMatchingClock(Core::Hardware::BASE_CLOCK_RATE, Core::Hardware::CNTFREQ);
41}
42
43CoreTiming::~CoreTiming() = default;
44
45void CoreTiming::ThreadEntry(CoreTiming& instance) {
46 instance.ThreadLoop();
47}
48
49void CoreTiming::Initialize() {
50 event_fifo_id = 0;
51 const auto empty_timed_callback = [](u64, s64) {};
52 ev_lost = CreateEvent("_lost_event", empty_timed_callback);
53 timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
54}
55
56void CoreTiming::Shutdown() {
57 paused = true;
58 shutting_down = true;
59 event.Set();
60 timer_thread->join();
61 ClearPendingEvents();
62 timer_thread.reset();
63 has_started = false;
64}
65
66void CoreTiming::Pause(bool is_paused) {
67 paused = is_paused;
68}
69
70void CoreTiming::SyncPause(bool is_paused) {
71 if (is_paused == paused && paused_set == paused) {
72 return;
73 }
74 Pause(is_paused);
75 event.Set();
76 while (paused_set != is_paused)
77 ;
78}
79
80bool CoreTiming::IsRunning() const {
81 return !paused_set;
82}
83
84bool CoreTiming::HasPendingEvents() const {
85 return !(wait_set && event_queue.empty());
86}
87
88void CoreTiming::ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
89 u64 userdata) {
90 basic_lock.lock();
91 const u64 timeout = static_cast<u64>(GetGlobalTimeNs().count() + ns_into_future);
92
93 event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
94
95 std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
96 basic_lock.unlock();
97 event.Set();
98}
99
100void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata) {
101 basic_lock.lock();
102 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
103 return e.type.lock().get() == event_type.get() && e.userdata == userdata;
104 });
105
106 // Removing random items breaks the invariant so we have to re-establish it.
107 if (itr != event_queue.end()) {
108 event_queue.erase(itr, event_queue.end());
109 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
110 }
111 basic_lock.unlock();
112}
113
114void CoreTiming::AddTicks(std::size_t core_index, u64 ticks) {
115 ticks_count[core_index] += ticks;
116}
117
118void CoreTiming::ResetTicks(std::size_t core_index) {
119 ticks_count[core_index] = 0;
120}
121
122u64 CoreTiming::GetCPUTicks() const {
123 return clock->GetCPUCycles();
124}
125
126u64 CoreTiming::GetClockTicks() const {
127 return clock->GetClockCycles();
128}
129
130void CoreTiming::ClearPendingEvents() {
131 event_queue.clear();
132}
133
134void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
135 basic_lock.lock();
136
137 const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
138 return e.type.lock().get() == event_type.get();
139 });
140
141 // Removing random items breaks the invariant so we have to re-establish it.
142 if (itr != event_queue.end()) {
143 event_queue.erase(itr, event_queue.end());
144 std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
145 }
146 basic_lock.unlock();
147}
148
149std::optional<u64> CoreTiming::Advance() {
150 advance_lock.lock();
151 basic_lock.lock();
152 global_timer = GetGlobalTimeNs().count();
153
154 while (!event_queue.empty() && event_queue.front().time <= global_timer) {
155 Event evt = std::move(event_queue.front());
156 std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
157 event_queue.pop_back();
158 basic_lock.unlock();
159
160 if (auto event_type{evt.type.lock()}) {
161 event_type->callback(evt.userdata, global_timer - evt.time);
162 }
163
164 basic_lock.lock();
165 }
166
167 if (!event_queue.empty()) {
168 const u64 next_time = event_queue.front().time - global_timer;
169 basic_lock.unlock();
170 advance_lock.unlock();
171 return next_time;
172 } else {
173 basic_lock.unlock();
174 advance_lock.unlock();
175 return std::nullopt;
176 }
177}
178
179void CoreTiming::ThreadLoop() {
180 has_started = true;
181 while (!shutting_down) {
182 while (!paused) {
183 paused_set = false;
184 const auto next_time = Advance();
185 if (next_time) {
186 std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
187 event.WaitFor(next_time_ns);
188 } else {
189 wait_set = true;
190 event.Wait();
191 }
192 wait_set = false;
193 }
194 paused_set = true;
195 }
196}
197
198std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const {
199 return clock->GetTimeNS();
200}
201
202std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const {
203 return clock->GetTimeUS();
204}
205
206} // namespace Core::HostTiming
diff --git a/src/core/host_timing.h b/src/core/host_timing.h
new file mode 100644
index 000000000..be6b68d7c
--- /dev/null
+++ b/src/core/host_timing.h
@@ -0,0 +1,160 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <atomic>
8#include <chrono>
9#include <functional>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <string>
14#include <thread>
15#include <vector>
16
17#include "common/common_types.h"
18#include "common/spin_lock.h"
19#include "common/thread.h"
20#include "common/threadsafe_queue.h"
21#include "common/wall_clock.h"
22#include "core/hardware_properties.h"
23
24namespace Core::HostTiming {
25
26/// A callback that may be scheduled for a particular core timing event.
27using TimedCallback = std::function<void(u64 userdata, s64 cycles_late)>;
28
29/// Contains the characteristics of a particular event.
30struct EventType {
31 EventType(TimedCallback&& callback, std::string&& name)
32 : callback{std::move(callback)}, name{std::move(name)} {}
33
34 /// The event's callback function.
35 TimedCallback callback;
36 /// A pointer to the name of the event.
37 const std::string name;
38};
39
40/**
41 * This is a system to schedule events into the emulated machine's future. Time is measured
42 * in main CPU clock cycles.
43 *
44 * To schedule an event, you first have to register its type. This is where you pass in the
45 * callback. You then schedule events using the type id you get back.
46 *
47 * The int cyclesLate that the callbacks get is how many cycles late it was.
48 * So to schedule a new event on a regular basis:
49 * inside callback:
50 * ScheduleEvent(periodInCycles - cyclesLate, callback, "whatever")
51 */
52class CoreTiming {
53public:
54 CoreTiming();
55 ~CoreTiming();
56
57 CoreTiming(const CoreTiming&) = delete;
58 CoreTiming(CoreTiming&&) = delete;
59
60 CoreTiming& operator=(const CoreTiming&) = delete;
61 CoreTiming& operator=(CoreTiming&&) = delete;
62
63 /// CoreTiming begins at the boundary of timing slice -1. An initial call to Advance() is
64 /// required to end slice - 1 and start slice 0 before the first cycle of code is executed.
65 void Initialize();
66
67 /// Tears down all timing related functionality.
68 void Shutdown();
69
70 /// Pauses/Unpauses the execution of the timer thread.
71 void Pause(bool is_paused);
72
73 /// Pauses/Unpauses the execution of the timer thread and waits until paused.
74 void SyncPause(bool is_paused);
75
76 /// Checks if core timing is running.
77 bool IsRunning() const;
78
79 /// Checks if the timer thread has started.
80 bool HasStarted() const {
81 return has_started;
82 }
83
84 /// Checks if there are any pending time events.
85 bool HasPendingEvents() const;
86
87 /// Schedules an event in core timing
88 void ScheduleEvent(s64 ns_into_future, const std::shared_ptr<EventType>& event_type,
89 u64 userdata = 0);
90
91 void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, u64 userdata);
92
93 /// We only permit one event of each type in the queue at a time.
94 void RemoveEvent(const std::shared_ptr<EventType>& event_type);
95
96 void AddTicks(std::size_t core_index, u64 ticks);
97
98 void ResetTicks(std::size_t core_index);
99
100 /// Returns current time in emulated CPU cycles
101 u64 GetCPUTicks() const;
102
103 /// Returns current time in emulated in Clock cycles
104 u64 GetClockTicks() const;
105
106 /// Returns current time in microseconds.
107 std::chrono::microseconds GetGlobalTimeUs() const;
108
109 /// Returns current time in nanoseconds.
110 std::chrono::nanoseconds GetGlobalTimeNs() const;
111
112 /// Checks for events manually and returns time in nanoseconds for next event, threadsafe.
113 std::optional<u64> Advance();
114
115private:
116 struct Event;
117
118 /// Clear all pending events. This should ONLY be done on exit.
119 void ClearPendingEvents();
120
121 static void ThreadEntry(CoreTiming& instance);
122 void ThreadLoop();
123
124 std::unique_ptr<Common::WallClock> clock;
125
126 u64 global_timer = 0;
127
128 std::chrono::nanoseconds start_point;
129
130 // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
131 // We don't use std::priority_queue because we need to be able to serialize, unserialize and
132 // erase arbitrary events (RemoveEvent()) regardless of the queue order. These aren't
133 // accomodated by the standard adaptor class.
134 std::vector<Event> event_queue;
135 u64 event_fifo_id = 0;
136
137 std::shared_ptr<EventType> ev_lost;
138 Common::Event event{};
139 Common::SpinLock basic_lock{};
140 Common::SpinLock advance_lock{};
141 std::unique_ptr<std::thread> timer_thread;
142 std::atomic<bool> paused{};
143 std::atomic<bool> paused_set{};
144 std::atomic<bool> wait_set{};
145 std::atomic<bool> shutting_down{};
146 std::atomic<bool> has_started{};
147
148 std::array<std::atomic<u64>, Core::Hardware::NUM_CPU_CORES> ticks_count{};
149};
150
151/// Creates a core timing event with the given name and callback.
152///
153/// @param name The name of the core timing event to create.
154/// @param callback The callback to execute for the event.
155///
156/// @returns An EventType instance representing the created event.
157///
158std::shared_ptr<EventType> CreateEvent(std::string name, TimedCallback&& callback);
159
160} // namespace Core::HostTiming
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 4edff9cd8..56df5e925 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -127,6 +127,13 @@ void LogSettings() {
127 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); 127 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
128} 128}
129 129
130float Volume() {
131 if (values.audio_muted) {
132 return 0.0f;
133 }
134 return values.volume;
135}
136
130bool IsGPULevelExtreme() { 137bool IsGPULevelExtreme() {
131 return values.gpu_accuracy == GPUAccuracy::Extreme; 138 return values.gpu_accuracy == GPUAccuracy::Extreme;
132} 139}
diff --git a/src/core/settings.h b/src/core/settings.h
index 78eb33737..a598ccbc1 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -437,7 +437,7 @@ struct Values {
437 bool renderer_debug; 437 bool renderer_debug;
438 int vulkan_device; 438 int vulkan_device;
439 439
440 float resolution_factor; 440 u16 resolution_factor{1};
441 int aspect_ratio; 441 int aspect_ratio;
442 int max_anisotropy; 442 int max_anisotropy;
443 bool use_frame_limit; 443 bool use_frame_limit;
@@ -459,6 +459,7 @@ struct Values {
459 bool use_dev_keys; 459 bool use_dev_keys;
460 460
461 // Audio 461 // Audio
462 bool audio_muted;
462 std::string sink_id; 463 std::string sink_id;
463 bool enable_audio_stretching; 464 bool enable_audio_stretching;
464 std::string audio_device_id; 465 std::string audio_device_id;
@@ -474,6 +475,7 @@ struct Values {
474 bool reporting_services; 475 bool reporting_services;
475 bool quest_flag; 476 bool quest_flag;
476 bool disable_cpu_opt; 477 bool disable_cpu_opt;
478 bool disable_macro_jit;
477 479
478 // BCAT 480 // BCAT
479 std::string bcat_backend; 481 std::string bcat_backend;
@@ -489,6 +491,8 @@ struct Values {
489 std::map<u64, std::vector<std::string>> disabled_addons; 491 std::map<u64, std::vector<std::string>> disabled_addons;
490} extern values; 492} extern values;
491 493
494float Volume();
495
492bool IsGPULevelExtreme(); 496bool IsGPULevelExtreme();
493bool IsGPULevelHigh(); 497bool IsGPULevelHigh();
494 498
diff --git a/src/input_common/keyboard.cpp b/src/input_common/keyboard.cpp
index 078374be5..afb8e6612 100644
--- a/src/input_common/keyboard.cpp
+++ b/src/input_common/keyboard.cpp
@@ -76,7 +76,7 @@ std::unique_ptr<Input::ButtonDevice> Keyboard::Create(const Common::ParamPackage
76 int key_code = params.Get("code", 0); 76 int key_code = params.Get("code", 0);
77 std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list); 77 std::unique_ptr<KeyButton> button = std::make_unique<KeyButton>(key_button_list);
78 key_button_list->AddKeyButton(key_code, button.get()); 78 key_button_list->AddKeyButton(key_code, button.get());
79 return std::move(button); 79 return button;
80} 80}
81 81
82void Keyboard::PressKey(int key_code) { 82void Keyboard::PressKey(int key_code) {
diff --git a/src/input_common/motion_emu.cpp b/src/input_common/motion_emu.cpp
index 868251628..d4cdf76a3 100644
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@@ -145,7 +145,7 @@ std::unique_ptr<Input::MotionDevice> MotionEmu::Create(const Common::ParamPackag
145 // Previously created device is disconnected here. Having two motion devices for 3DS is not 145 // Previously created device is disconnected here. Having two motion devices for 3DS is not
146 // expected. 146 // expected.
147 current_device = device_wrapper->device; 147 current_device = device_wrapper->device;
148 return std::move(device_wrapper); 148 return device_wrapper;
149} 149}
150 150
151void MotionEmu::BeginTilt(int x, int y) { 151void MotionEmu::BeginTilt(int x, int y) {
diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt
index c7038b217..3f750b51c 100644
--- a/src/tests/CMakeLists.txt
+++ b/src/tests/CMakeLists.txt
@@ -1,12 +1,14 @@
1add_executable(tests 1add_executable(tests
2 common/bit_field.cpp 2 common/bit_field.cpp
3 common/bit_utils.cpp 3 common/bit_utils.cpp
4 common/fibers.cpp
4 common/multi_level_queue.cpp 5 common/multi_level_queue.cpp
5 common/param_package.cpp 6 common/param_package.cpp
6 common/ring_buffer.cpp 7 common/ring_buffer.cpp
7 core/arm/arm_test_common.cpp 8 core/arm/arm_test_common.cpp
8 core/arm/arm_test_common.h 9 core/arm/arm_test_common.h
9 core/core_timing.cpp 10 core/core_timing.cpp
11 core/host_timing.cpp
10 tests.cpp 12 tests.cpp
11) 13)
12 14
diff --git a/src/tests/common/fibers.cpp b/src/tests/common/fibers.cpp
new file mode 100644
index 000000000..12536b6d8
--- /dev/null
+++ b/src/tests/common/fibers.cpp
@@ -0,0 +1,358 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <atomic>
6#include <cstdlib>
7#include <functional>
8#include <memory>
9#include <thread>
10#include <unordered_map>
11#include <vector>
12
13#include <catch2/catch.hpp>
14#include <math.h>
15#include "common/common_types.h"
16#include "common/fiber.h"
17#include "common/spin_lock.h"
18
19namespace Common {
20
21class TestControl1 {
22public:
23 TestControl1() = default;
24
25 void DoWork();
26
27 void ExecuteThread(u32 id);
28
29 std::unordered_map<std::thread::id, u32> ids;
30 std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
31 std::vector<std::shared_ptr<Common::Fiber>> work_fibers;
32 std::vector<u32> items;
33 std::vector<u32> results;
34};
35
36static void WorkControl1(void* control) {
37 auto* test_control = static_cast<TestControl1*>(control);
38 test_control->DoWork();
39}
40
41void TestControl1::DoWork() {
42 std::thread::id this_id = std::this_thread::get_id();
43 u32 id = ids[this_id];
44 u32 value = items[id];
45 for (u32 i = 0; i < id; i++) {
46 value++;
47 }
48 results[id] = value;
49 Fiber::YieldTo(work_fibers[id], thread_fibers[id]);
50}
51
52void TestControl1::ExecuteThread(u32 id) {
53 std::thread::id this_id = std::this_thread::get_id();
54 ids[this_id] = id;
55 auto thread_fiber = Fiber::ThreadToFiber();
56 thread_fibers[id] = thread_fiber;
57 work_fibers[id] = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl1}, this);
58 items[id] = rand() % 256;
59 Fiber::YieldTo(thread_fibers[id], work_fibers[id]);
60 thread_fibers[id]->Exit();
61}
62
63static void ThreadStart1(u32 id, TestControl1& test_control) {
64 test_control.ExecuteThread(id);
65}
66
67/** This test checks for fiber setup configuration and validates that fibers are
68 * doing all the work required.
69 */
70TEST_CASE("Fibers::Setup", "[common]") {
71 constexpr u32 num_threads = 7;
72 TestControl1 test_control{};
73 test_control.thread_fibers.resize(num_threads);
74 test_control.work_fibers.resize(num_threads);
75 test_control.items.resize(num_threads, 0);
76 test_control.results.resize(num_threads, 0);
77 std::vector<std::thread> threads;
78 for (u32 i = 0; i < num_threads; i++) {
79 threads.emplace_back(ThreadStart1, i, std::ref(test_control));
80 }
81 for (u32 i = 0; i < num_threads; i++) {
82 threads[i].join();
83 }
84 for (u32 i = 0; i < num_threads; i++) {
85 REQUIRE(test_control.items[i] + i == test_control.results[i]);
86 }
87}
88
89class TestControl2 {
90public:
91 TestControl2() = default;
92
93 void DoWork1() {
94 trap2 = false;
95 while (trap.load())
96 ;
97 for (u32 i = 0; i < 12000; i++) {
98 value1 += i;
99 }
100 Fiber::YieldTo(fiber1, fiber3);
101 std::thread::id this_id = std::this_thread::get_id();
102 u32 id = ids[this_id];
103 assert1 = id == 1;
104 value2 += 5000;
105 Fiber::YieldTo(fiber1, thread_fibers[id]);
106 }
107
108 void DoWork2() {
109 while (trap2.load())
110 ;
111 value2 = 2000;
112 trap = false;
113 Fiber::YieldTo(fiber2, fiber1);
114 assert3 = false;
115 }
116
117 void DoWork3() {
118 std::thread::id this_id = std::this_thread::get_id();
119 u32 id = ids[this_id];
120 assert2 = id == 0;
121 value1 += 1000;
122 Fiber::YieldTo(fiber3, thread_fibers[id]);
123 }
124
125 void ExecuteThread(u32 id);
126
127 void CallFiber1() {
128 std::thread::id this_id = std::this_thread::get_id();
129 u32 id = ids[this_id];
130 Fiber::YieldTo(thread_fibers[id], fiber1);
131 }
132
133 void CallFiber2() {
134 std::thread::id this_id = std::this_thread::get_id();
135 u32 id = ids[this_id];
136 Fiber::YieldTo(thread_fibers[id], fiber2);
137 }
138
139 void Exit();
140
141 bool assert1{};
142 bool assert2{};
143 bool assert3{true};
144 u32 value1{};
145 u32 value2{};
146 std::atomic<bool> trap{true};
147 std::atomic<bool> trap2{true};
148 std::unordered_map<std::thread::id, u32> ids;
149 std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
150 std::shared_ptr<Common::Fiber> fiber1;
151 std::shared_ptr<Common::Fiber> fiber2;
152 std::shared_ptr<Common::Fiber> fiber3;
153};
154
155static void WorkControl2_1(void* control) {
156 auto* test_control = static_cast<TestControl2*>(control);
157 test_control->DoWork1();
158}
159
160static void WorkControl2_2(void* control) {
161 auto* test_control = static_cast<TestControl2*>(control);
162 test_control->DoWork2();
163}
164
165static void WorkControl2_3(void* control) {
166 auto* test_control = static_cast<TestControl2*>(control);
167 test_control->DoWork3();
168}
169
170void TestControl2::ExecuteThread(u32 id) {
171 std::thread::id this_id = std::this_thread::get_id();
172 ids[this_id] = id;
173 auto thread_fiber = Fiber::ThreadToFiber();
174 thread_fibers[id] = thread_fiber;
175}
176
177void TestControl2::Exit() {
178 std::thread::id this_id = std::this_thread::get_id();
179 u32 id = ids[this_id];
180 thread_fibers[id]->Exit();
181}
182
183static void ThreadStart2_1(u32 id, TestControl2& test_control) {
184 test_control.ExecuteThread(id);
185 test_control.CallFiber1();
186 test_control.Exit();
187}
188
189static void ThreadStart2_2(u32 id, TestControl2& test_control) {
190 test_control.ExecuteThread(id);
191 test_control.CallFiber2();
192 test_control.Exit();
193}
194
195/** This test checks for fiber thread exchange configuration and validates that fibers are
196 * that a fiber has been succesfully transfered from one thread to another and that the TLS
197 * region of the thread is kept while changing fibers.
198 */
199TEST_CASE("Fibers::InterExchange", "[common]") {
200 TestControl2 test_control{};
201 test_control.thread_fibers.resize(2);
202 test_control.fiber1 =
203 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_1}, &test_control);
204 test_control.fiber2 =
205 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_2}, &test_control);
206 test_control.fiber3 =
207 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl2_3}, &test_control);
208 std::thread thread1(ThreadStart2_1, 0, std::ref(test_control));
209 std::thread thread2(ThreadStart2_2, 1, std::ref(test_control));
210 thread1.join();
211 thread2.join();
212 REQUIRE(test_control.assert1);
213 REQUIRE(test_control.assert2);
214 REQUIRE(test_control.assert3);
215 REQUIRE(test_control.value2 == 7000);
216 u32 cal_value = 0;
217 for (u32 i = 0; i < 12000; i++) {
218 cal_value += i;
219 }
220 cal_value += 1000;
221 REQUIRE(test_control.value1 == cal_value);
222}
223
224class TestControl3 {
225public:
226 TestControl3() = default;
227
228 void DoWork1() {
229 value1 += 1;
230 Fiber::YieldTo(fiber1, fiber2);
231 std::thread::id this_id = std::this_thread::get_id();
232 u32 id = ids[this_id];
233 value3 += 1;
234 Fiber::YieldTo(fiber1, thread_fibers[id]);
235 }
236
237 void DoWork2() {
238 value2 += 1;
239 std::thread::id this_id = std::this_thread::get_id();
240 u32 id = ids[this_id];
241 Fiber::YieldTo(fiber2, thread_fibers[id]);
242 }
243
244 void ExecuteThread(u32 id);
245
246 void CallFiber1() {
247 std::thread::id this_id = std::this_thread::get_id();
248 u32 id = ids[this_id];
249 Fiber::YieldTo(thread_fibers[id], fiber1);
250 }
251
252 void Exit();
253
254 u32 value1{};
255 u32 value2{};
256 u32 value3{};
257 std::unordered_map<std::thread::id, u32> ids;
258 std::vector<std::shared_ptr<Common::Fiber>> thread_fibers;
259 std::shared_ptr<Common::Fiber> fiber1;
260 std::shared_ptr<Common::Fiber> fiber2;
261};
262
263static void WorkControl3_1(void* control) {
264 auto* test_control = static_cast<TestControl3*>(control);
265 test_control->DoWork1();
266}
267
268static void WorkControl3_2(void* control) {
269 auto* test_control = static_cast<TestControl3*>(control);
270 test_control->DoWork2();
271}
272
273void TestControl3::ExecuteThread(u32 id) {
274 std::thread::id this_id = std::this_thread::get_id();
275 ids[this_id] = id;
276 auto thread_fiber = Fiber::ThreadToFiber();
277 thread_fibers[id] = thread_fiber;
278}
279
280void TestControl3::Exit() {
281 std::thread::id this_id = std::this_thread::get_id();
282 u32 id = ids[this_id];
283 thread_fibers[id]->Exit();
284}
285
286static void ThreadStart3(u32 id, TestControl3& test_control) {
287 test_control.ExecuteThread(id);
288 test_control.CallFiber1();
289 test_control.Exit();
290}
291
292/** This test checks for one two threads racing for starting the same fiber.
293 * It checks execution occured in an ordered manner and by no time there were
294 * two contexts at the same time.
295 */
296TEST_CASE("Fibers::StartRace", "[common]") {
297 TestControl3 test_control{};
298 test_control.thread_fibers.resize(2);
299 test_control.fiber1 =
300 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_1}, &test_control);
301 test_control.fiber2 =
302 std::make_shared<Fiber>(std::function<void(void*)>{WorkControl3_2}, &test_control);
303 std::thread thread1(ThreadStart3, 0, std::ref(test_control));
304 std::thread thread2(ThreadStart3, 1, std::ref(test_control));
305 thread1.join();
306 thread2.join();
307 REQUIRE(test_control.value1 == 1);
308 REQUIRE(test_control.value2 == 1);
309 REQUIRE(test_control.value3 == 1);
310}
311
312class TestControl4;
313
314static void WorkControl4(void* control);
315
316class TestControl4 {
317public:
318 TestControl4() {
319 fiber1 = std::make_shared<Fiber>(std::function<void(void*)>{WorkControl4}, this);
320 goal_reached = false;
321 rewinded = false;
322 }
323
324 void Execute() {
325 thread_fiber = Fiber::ThreadToFiber();
326 Fiber::YieldTo(thread_fiber, fiber1);
327 thread_fiber->Exit();
328 }
329
330 void DoWork() {
331 fiber1->SetRewindPoint(std::function<void(void*)>{WorkControl4}, this);
332 if (rewinded) {
333 goal_reached = true;
334 Fiber::YieldTo(fiber1, thread_fiber);
335 }
336 rewinded = true;
337 fiber1->Rewind();
338 }
339
340 std::shared_ptr<Common::Fiber> fiber1;
341 std::shared_ptr<Common::Fiber> thread_fiber;
342 bool goal_reached;
343 bool rewinded;
344};
345
346static void WorkControl4(void* control) {
347 auto* test_control = static_cast<TestControl4*>(control);
348 test_control->DoWork();
349}
350
351TEST_CASE("Fibers::Rewind", "[common]") {
352 TestControl4 test_control{};
353 test_control.Execute();
354 REQUIRE(test_control.goal_reached);
355 REQUIRE(test_control.rewinded);
356}
357
358} // namespace Common
diff --git a/src/tests/core/host_timing.cpp b/src/tests/core/host_timing.cpp
new file mode 100644
index 000000000..556254098
--- /dev/null
+++ b/src/tests/core/host_timing.cpp
@@ -0,0 +1,142 @@
1// Copyright 2016 Dolphin Emulator Project / 2017 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#include <catch2/catch.hpp>
6
7#include <array>
8#include <bitset>
9#include <cstdlib>
10#include <memory>
11#include <string>
12
13#include "common/file_util.h"
14#include "core/core.h"
15#include "core/host_timing.h"
16
17// Numbers are chosen randomly to make sure the correct one is given.
18static constexpr std::array<u64, 5> CB_IDS{{42, 144, 93, 1026, UINT64_C(0xFFFF7FFFF7FFFF)}};
19static constexpr int MAX_SLICE_LENGTH = 10000; // Copied from CoreTiming internals
20static constexpr std::array<u64, 5> calls_order{{2, 0, 1, 4, 3}};
21static std::array<s64, 5> delays{};
22
23static std::bitset<CB_IDS.size()> callbacks_ran_flags;
24static u64 expected_callback = 0;
25
26template <unsigned int IDX>
27void HostCallbackTemplate(u64 userdata, s64 nanoseconds_late) {
28 static_assert(IDX < CB_IDS.size(), "IDX out of range");
29 callbacks_ran_flags.set(IDX);
30 REQUIRE(CB_IDS[IDX] == userdata);
31 REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
32 delays[IDX] = nanoseconds_late;
33 ++expected_callback;
34}
35
36struct ScopeInit final {
37 ScopeInit() {
38 core_timing.Initialize();
39 }
40 ~ScopeInit() {
41 core_timing.Shutdown();
42 }
43
44 Core::HostTiming::CoreTiming core_timing;
45};
46
47#pragma optimize("", off)
48
49static u64 TestTimerSpeed(Core::HostTiming::CoreTiming& core_timing) {
50 u64 start = core_timing.GetGlobalTimeNs().count();
51 u64 placebo = 0;
52 for (std::size_t i = 0; i < 1000; i++) {
53 placebo += core_timing.GetGlobalTimeNs().count();
54 }
55 u64 end = core_timing.GetGlobalTimeNs().count();
56 return (end - start);
57}
58
59#pragma optimize("", on)
60
61TEST_CASE("HostTiming[BasicOrder]", "[core]") {
62 ScopeInit guard;
63 auto& core_timing = guard.core_timing;
64 std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
65 Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
66 Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
67 Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
68 Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
69 Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
70 };
71
72 expected_callback = 0;
73
74 core_timing.SyncPause(true);
75
76 u64 one_micro = 1000U;
77 for (std::size_t i = 0; i < events.size(); i++) {
78 u64 order = calls_order[i];
79 core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
80 }
81 /// test pause
82 REQUIRE(callbacks_ran_flags.none());
83
84 core_timing.Pause(false); // No need to sync
85
86 while (core_timing.HasPendingEvents())
87 ;
88
89 REQUIRE(callbacks_ran_flags.all());
90
91 for (std::size_t i = 0; i < delays.size(); i++) {
92 const double delay = static_cast<double>(delays[i]);
93 const double micro = delay / 1000.0f;
94 const double mili = micro / 1000.0f;
95 printf("HostTimer Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
96 }
97}
98
99TEST_CASE("HostTiming[BasicOrderNoPausing]", "[core]") {
100 ScopeInit guard;
101 auto& core_timing = guard.core_timing;
102 std::vector<std::shared_ptr<Core::HostTiming::EventType>> events{
103 Core::HostTiming::CreateEvent("callbackA", HostCallbackTemplate<0>),
104 Core::HostTiming::CreateEvent("callbackB", HostCallbackTemplate<1>),
105 Core::HostTiming::CreateEvent("callbackC", HostCallbackTemplate<2>),
106 Core::HostTiming::CreateEvent("callbackD", HostCallbackTemplate<3>),
107 Core::HostTiming::CreateEvent("callbackE", HostCallbackTemplate<4>),
108 };
109
110 core_timing.SyncPause(true);
111 core_timing.SyncPause(false);
112
113 expected_callback = 0;
114
115 u64 start = core_timing.GetGlobalTimeNs().count();
116 u64 one_micro = 1000U;
117 for (std::size_t i = 0; i < events.size(); i++) {
118 u64 order = calls_order[i];
119 core_timing.ScheduleEvent(i * one_micro + 100U, events[order], CB_IDS[order]);
120 }
121 u64 end = core_timing.GetGlobalTimeNs().count();
122 const double scheduling_time = static_cast<double>(end - start);
123 const double timer_time = static_cast<double>(TestTimerSpeed(core_timing));
124
125 while (core_timing.HasPendingEvents())
126 ;
127
128 REQUIRE(callbacks_ran_flags.all());
129
130 for (std::size_t i = 0; i < delays.size(); i++) {
131 const double delay = static_cast<double>(delays[i]);
132 const double micro = delay / 1000.0f;
133 const double mili = micro / 1000.0f;
134 printf("HostTimer No Pausing Delay[%zu]: %.3f %.6f\n", i, micro, mili);
135 }
136
137 const double micro = scheduling_time / 1000.0f;
138 const double mili = micro / 1000.0f;
139 printf("HostTimer No Pausing Scheduling Time: %.3f %.6f\n", micro, mili);
140 printf("HostTimer No Pausing Timer Time: %.3f %.6f\n", timer_time / 1000.f,
141 timer_time / 1000000.f);
142}
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d6ee82836..2dc752aa9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -25,6 +25,14 @@ add_library(video_core STATIC
25 engines/shader_bytecode.h 25 engines/shader_bytecode.h
26 engines/shader_header.h 26 engines/shader_header.h
27 engines/shader_type.h 27 engines/shader_type.h
28 macro/macro.cpp
29 macro/macro.h
30 macro/macro_hle.cpp
31 macro/macro_hle.h
32 macro/macro_interpreter.cpp
33 macro/macro_interpreter.h
34 macro/macro_jit_x64.cpp
35 macro/macro_jit_x64.h
28 fence_manager.h 36 fence_manager.h
29 gpu.cpp 37 gpu.cpp
30 gpu.h 38 gpu.h
@@ -36,8 +44,6 @@ add_library(video_core STATIC
36 gpu_thread.h 44 gpu_thread.h
37 guest_driver.cpp 45 guest_driver.cpp
38 guest_driver.h 46 guest_driver.h
39 macro_interpreter.cpp
40 macro_interpreter.h
41 memory_manager.cpp 47 memory_manager.cpp
42 memory_manager.h 48 memory_manager.h
43 morton.cpp 49 morton.cpp
@@ -45,11 +51,11 @@ add_library(video_core STATIC
45 query_cache.h 51 query_cache.h
46 rasterizer_accelerated.cpp 52 rasterizer_accelerated.cpp
47 rasterizer_accelerated.h 53 rasterizer_accelerated.h
48 rasterizer_cache.cpp
49 rasterizer_cache.h
50 rasterizer_interface.h 54 rasterizer_interface.h
51 renderer_base.cpp 55 renderer_base.cpp
52 renderer_base.h 56 renderer_base.h
57 renderer_opengl/gl_arb_decompiler.cpp
58 renderer_opengl/gl_arb_decompiler.h
53 renderer_opengl/gl_buffer_cache.cpp 59 renderer_opengl/gl_buffer_cache.cpp
54 renderer_opengl/gl_buffer_cache.h 60 renderer_opengl/gl_buffer_cache.h
55 renderer_opengl/gl_device.cpp 61 renderer_opengl/gl_device.cpp
@@ -89,6 +95,7 @@ add_library(video_core STATIC
89 renderer_opengl/utils.h 95 renderer_opengl/utils.h
90 sampler_cache.cpp 96 sampler_cache.cpp
91 sampler_cache.h 97 sampler_cache.h
98 shader_cache.h
92 shader/decode/arithmetic.cpp 99 shader/decode/arithmetic.cpp
93 shader/decode/arithmetic_immediate.cpp 100 shader/decode/arithmetic_immediate.cpp
94 shader/decode/bfe.cpp 101 shader/decode/bfe.cpp
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index e35ee0b67..e64170e66 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,48 +15,47 @@ namespace VideoCommon {
15 15
16class BufferBlock { 16class BufferBlock {
17public: 17public:
18 bool Overlaps(const VAddr start, const VAddr end) const { 18 bool Overlaps(VAddr start, VAddr end) const {
19 return (cpu_addr < end) && (cpu_addr_end > start); 19 return (cpu_addr < end) && (cpu_addr_end > start);
20 } 20 }
21 21
22 bool IsInside(const VAddr other_start, const VAddr other_end) const { 22 bool IsInside(VAddr other_start, VAddr other_end) const {
23 return cpu_addr <= other_start && other_end <= cpu_addr_end; 23 return cpu_addr <= other_start && other_end <= cpu_addr_end;
24 } 24 }
25 25
26 std::size_t GetOffset(const VAddr in_addr) { 26 std::size_t Offset(VAddr in_addr) const {
27 return static_cast<std::size_t>(in_addr - cpu_addr); 27 return static_cast<std::size_t>(in_addr - cpu_addr);
28 } 28 }
29 29
30 VAddr GetCpuAddr() const { 30 VAddr CpuAddr() const {
31 return cpu_addr; 31 return cpu_addr;
32 } 32 }
33 33
34 VAddr GetCpuAddrEnd() const { 34 VAddr CpuAddrEnd() const {
35 return cpu_addr_end; 35 return cpu_addr_end;
36 } 36 }
37 37
38 void SetCpuAddr(const VAddr new_addr) { 38 void SetCpuAddr(VAddr new_addr) {
39 cpu_addr = new_addr; 39 cpu_addr = new_addr;
40 cpu_addr_end = new_addr + size; 40 cpu_addr_end = new_addr + size;
41 } 41 }
42 42
43 std::size_t GetSize() const { 43 std::size_t Size() const {
44 return size; 44 return size;
45 } 45 }
46 46
47 void SetEpoch(u64 new_epoch) { 47 u64 Epoch() const {
48 epoch = new_epoch; 48 return epoch;
49 } 49 }
50 50
51 u64 GetEpoch() { 51 void SetEpoch(u64 new_epoch) {
52 return epoch; 52 epoch = new_epoch;
53 } 53 }
54 54
55protected: 55protected:
56 explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { 56 explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} {
57 SetCpuAddr(cpu_addr); 57 SetCpuAddr(cpu_addr_);
58 } 58 }
59 ~BufferBlock() = default;
60 59
61private: 60private:
62 VAddr cpu_addr{}; 61 VAddr cpu_addr{};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d9a4a1b4d..cf8bdd021 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -30,23 +30,31 @@
30 30
31namespace VideoCommon { 31namespace VideoCommon {
32 32
33template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> 33template <typename Buffer, typename BufferType, typename StreamBuffer>
34class BufferCache { 34class BufferCache {
35 using IntervalSet = boost::icl::interval_set<VAddr>; 35 using IntervalSet = boost::icl::interval_set<VAddr>;
36 using IntervalType = typename IntervalSet::interval_type; 36 using IntervalType = typename IntervalSet::interval_type;
37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; 37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
38 38
39 static constexpr u64 WRITE_PAGE_BIT = 11;
40 static constexpr u64 BLOCK_PAGE_BITS = 21;
41 static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS;
42
39public: 43public:
40 using BufferInfo = std::pair<BufferType, u64>; 44 struct BufferInfo {
45 BufferType handle;
46 u64 offset;
47 u64 address;
48 };
41 49
42 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, 50 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
43 bool is_written = false, bool use_fast_cbuf = false) { 51 bool is_written = false, bool use_fast_cbuf = false) {
44 std::lock_guard lock{mutex}; 52 std::lock_guard lock{mutex};
45 53
46 const auto& memory_manager = system.GPU().MemoryManager(); 54 auto& memory_manager = system.GPU().MemoryManager();
47 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); 55 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
48 if (!cpu_addr_opt) { 56 if (!cpu_addr_opt) {
49 return {GetEmptyBuffer(size), 0}; 57 return GetEmptyBuffer(size);
50 } 58 }
51 const VAddr cpu_addr = *cpu_addr_opt; 59 const VAddr cpu_addr = *cpu_addr_opt;
52 60
@@ -55,33 +63,36 @@ public:
55 constexpr std::size_t max_stream_size = 0x800; 63 constexpr std::size_t max_stream_size = 0x800;
56 if (use_fast_cbuf || size < max_stream_size) { 64 if (use_fast_cbuf || size < max_stream_size) {
57 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { 65 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
58 auto& memory_manager = system.GPU().MemoryManager(); 66 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
59 if (use_fast_cbuf) { 67 if (use_fast_cbuf) {
60 if (memory_manager.IsGranularRange(gpu_addr, size)) { 68 u8* dest;
61 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 69 if (is_granular) {
62 return ConstBufferUpload(host_ptr, size); 70 dest = memory_manager.GetPointer(gpu_addr);
63 } else { 71 } else {
64 staging_buffer.resize(size); 72 staging_buffer.resize(size);
65 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 73 dest = staging_buffer.data();
66 return ConstBufferUpload(staging_buffer.data(), size); 74 memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
67 } 75 }
76 return ConstBufferUpload(dest, size);
77 }
78 if (is_granular) {
79 u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
80 return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
81 std::memcpy(dest, host_ptr, size);
82 });
68 } else { 83 } else {
69 if (memory_manager.IsGranularRange(gpu_addr, size)) { 84 return StreamBufferUpload(
70 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 85 size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
71 return StreamBufferUpload(host_ptr, size, alignment); 86 memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
72 } else { 87 });
73 staging_buffer.resize(size);
74 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
75 return StreamBufferUpload(staging_buffer.data(), size, alignment);
76 }
77 } 88 }
78 } 89 }
79 } 90 }
80 91
81 OwnerBuffer block = GetBlock(cpu_addr, size); 92 Buffer* const block = GetBlock(cpu_addr, size);
82 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); 93 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
83 if (!map) { 94 if (!map) {
84 return {GetEmptyBuffer(size), 0}; 95 return GetEmptyBuffer(size);
85 } 96 }
86 if (is_written) { 97 if (is_written) {
87 map->MarkAsModified(true, GetModifiedTicks()); 98 map->MarkAsModified(true, GetModifiedTicks());
@@ -94,41 +105,49 @@ public:
94 } 105 }
95 } 106 }
96 107
97 return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; 108 return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
98 } 109 }
99 110
100 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. 111 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
101 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, 112 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
102 std::size_t alignment = 4) { 113 std::size_t alignment = 4) {
103 std::lock_guard lock{mutex}; 114 std::lock_guard lock{mutex};
104 return StreamBufferUpload(raw_pointer, size, alignment); 115 return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
116 std::memcpy(dest, raw_pointer, size);
117 });
105 } 118 }
106 119
107 void Map(std::size_t max_size) { 120 /// Prepares the buffer cache for data uploading
121 /// @param max_size Maximum number of bytes that will be uploaded
122 /// @return True when a stream buffer invalidation was required, false otherwise
123 bool Map(std::size_t max_size) {
108 std::lock_guard lock{mutex}; 124 std::lock_guard lock{mutex};
109 125
126 bool invalidated;
110 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); 127 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
111 buffer_offset = buffer_offset_base; 128 buffer_offset = buffer_offset_base;
129
130 return invalidated;
112 } 131 }
113 132
114 /// Finishes the upload stream, returns true on bindings invalidation. 133 /// Finishes the upload stream
115 bool Unmap() { 134 void Unmap() {
116 std::lock_guard lock{mutex}; 135 std::lock_guard lock{mutex};
117
118 stream_buffer->Unmap(buffer_offset - buffer_offset_base); 136 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
119 return std::exchange(invalidated, false);
120 } 137 }
121 138
139 /// Function called at the end of each frame, inteded for deferred operations
122 void TickFrame() { 140 void TickFrame() {
123 ++epoch; 141 ++epoch;
142
124 while (!pending_destruction.empty()) { 143 while (!pending_destruction.empty()) {
125 // Delay at least 4 frames before destruction. 144 // Delay at least 4 frames before destruction.
126 // This is due to triple buffering happening on some drivers. 145 // This is due to triple buffering happening on some drivers.
127 static constexpr u64 epochs_to_destroy = 5; 146 static constexpr u64 epochs_to_destroy = 5;
128 if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { 147 if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) {
129 break; 148 break;
130 } 149 }
131 pending_destruction.pop_front(); 150 pending_destruction.pop();
132 } 151 }
133 } 152 }
134 153
@@ -239,28 +258,16 @@ public:
239 committed_flushes.pop_front(); 258 committed_flushes.pop_front();
240 } 259 }
241 260
242 virtual BufferType GetEmptyBuffer(std::size_t size) = 0; 261 virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
243 262
244protected: 263protected:
245 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 264 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
246 std::unique_ptr<StreamBuffer> stream_buffer) 265 std::unique_ptr<StreamBuffer> stream_buffer)
247 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, 266 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
248 stream_buffer_handle{this->stream_buffer->GetHandle()} {}
249 267
250 ~BufferCache() = default; 268 ~BufferCache() = default;
251 269
252 virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; 270 virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
253
254 virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
255
256 virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
257 const u8* data) = 0;
258
259 virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
260 u8* data) = 0;
261
262 virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
263 std::size_t dst_offset, std::size_t size) = 0;
264 271
265 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { 272 virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
266 return {}; 273 return {};
@@ -315,7 +322,7 @@ protected:
315 } 322 }
316 323
317private: 324private:
318 MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, 325 MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr,
319 std::size_t size) { 326 std::size_t size) {
320 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); 327 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
321 if (overlaps.empty()) { 328 if (overlaps.empty()) {
@@ -323,11 +330,11 @@ private:
323 const VAddr cpu_addr_end = cpu_addr + size; 330 const VAddr cpu_addr_end = cpu_addr + size;
324 if (memory_manager.IsGranularRange(gpu_addr, size)) { 331 if (memory_manager.IsGranularRange(gpu_addr, size)) {
325 u8* host_ptr = memory_manager.GetPointer(gpu_addr); 332 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
326 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); 333 block->Upload(block->Offset(cpu_addr), size, host_ptr);
327 } else { 334 } else {
328 staging_buffer.resize(size); 335 staging_buffer.resize(size);
329 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 336 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
330 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); 337 block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
331 } 338 }
332 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); 339 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
333 } 340 }
@@ -370,7 +377,7 @@ private:
370 return map; 377 return map;
371 } 378 }
372 379
373 void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, 380 void UpdateBlock(const Buffer* block, VAddr start, VAddr end,
374 const VectorMapInterval& overlaps) { 381 const VectorMapInterval& overlaps) {
375 const IntervalType base_interval{start, end}; 382 const IntervalType base_interval{start, end};
376 IntervalSet interval_set{}; 383 IntervalSet interval_set{};
@@ -380,13 +387,13 @@ private:
380 interval_set.subtract(subtract); 387 interval_set.subtract(subtract);
381 } 388 }
382 for (auto& interval : interval_set) { 389 for (auto& interval : interval_set) {
383 std::size_t size = interval.upper() - interval.lower(); 390 const std::size_t size = interval.upper() - interval.lower();
384 if (size > 0) { 391 if (size == 0) {
385 staging_buffer.resize(size); 392 continue;
386 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
387 UploadBlockData(block, block->GetOffset(interval.lower()), size,
388 staging_buffer.data());
389 } 393 }
394 staging_buffer.resize(size);
395 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
396 block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
390 } 397 }
391 } 398 }
392 399
@@ -416,23 +423,27 @@ private:
416 } 423 }
417 424
418 void FlushMap(MapInterval* map) { 425 void FlushMap(MapInterval* map) {
426 const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS);
427 ASSERT_OR_EXECUTE(it != blocks.end(), return;);
428
429 std::shared_ptr<Buffer> block = it->second;
430
419 const std::size_t size = map->end - map->start; 431 const std::size_t size = map->end - map->start;
420 OwnerBuffer block = blocks[map->start >> block_page_bits];
421 staging_buffer.resize(size); 432 staging_buffer.resize(size);
422 DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); 433 block->Download(block->Offset(map->start), size, staging_buffer.data());
423 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); 434 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
424 map->MarkAsModified(false, 0); 435 map->MarkAsModified(false, 0);
425 } 436 }
426 437
427 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, 438 template <typename Callable>
428 std::size_t alignment) { 439 BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
429 AlignBuffer(alignment); 440 AlignBuffer(alignment);
430 const std::size_t uploaded_offset = buffer_offset; 441 const std::size_t uploaded_offset = buffer_offset;
431 std::memcpy(buffer_ptr, raw_pointer, size); 442 callable(buffer_ptr);
432 443
433 buffer_ptr += size; 444 buffer_ptr += size;
434 buffer_offset += size; 445 buffer_offset += size;
435 return {stream_buffer_handle, uploaded_offset}; 446 return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
436 } 447 }
437 448
438 void AlignBuffer(std::size_t alignment) { 449 void AlignBuffer(std::size_t alignment) {
@@ -442,97 +453,89 @@ private:
442 buffer_offset = offset_aligned; 453 buffer_offset = offset_aligned;
443 } 454 }
444 455
445 OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { 456 std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) {
446 const std::size_t old_size = buffer->GetSize(); 457 const std::size_t old_size = buffer->Size();
447 const std::size_t new_size = old_size + block_page_size; 458 const std::size_t new_size = old_size + BLOCK_PAGE_SIZE;
448 const VAddr cpu_addr = buffer->GetCpuAddr(); 459 const VAddr cpu_addr = buffer->CpuAddr();
449 OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); 460 std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size);
450 CopyBlock(buffer, new_buffer, 0, 0, old_size); 461 new_buffer->CopyFrom(*buffer, 0, 0, old_size);
451 buffer->SetEpoch(epoch); 462 QueueDestruction(std::move(buffer));
452 pending_destruction.push_back(buffer); 463
453 const VAddr cpu_addr_end = cpu_addr + new_size - 1; 464 const VAddr cpu_addr_end = cpu_addr + new_size - 1;
454 u64 page_start = cpu_addr >> block_page_bits; 465 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
455 const u64 page_end = cpu_addr_end >> block_page_bits; 466 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
456 while (page_start <= page_end) { 467 blocks.insert_or_assign(page_start, new_buffer);
457 blocks[page_start] = new_buffer;
458 ++page_start;
459 } 468 }
469
460 return new_buffer; 470 return new_buffer;
461 } 471 }
462 472
463 OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { 473 std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first,
464 const std::size_t size_1 = first->GetSize(); 474 std::shared_ptr<Buffer> second) {
465 const std::size_t size_2 = second->GetSize(); 475 const std::size_t size_1 = first->Size();
466 const VAddr first_addr = first->GetCpuAddr(); 476 const std::size_t size_2 = second->Size();
467 const VAddr second_addr = second->GetCpuAddr(); 477 const VAddr first_addr = first->CpuAddr();
478 const VAddr second_addr = second->CpuAddr();
468 const VAddr new_addr = std::min(first_addr, second_addr); 479 const VAddr new_addr = std::min(first_addr, second_addr);
469 const std::size_t new_size = size_1 + size_2; 480 const std::size_t new_size = size_1 + size_2;
470 OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); 481
471 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); 482 std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size);
472 CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); 483 new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1);
473 first->SetEpoch(epoch); 484 new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2);
474 second->SetEpoch(epoch); 485 QueueDestruction(std::move(first));
475 pending_destruction.push_back(first); 486 QueueDestruction(std::move(second));
476 pending_destruction.push_back(second); 487
477 const VAddr cpu_addr_end = new_addr + new_size - 1; 488 const VAddr cpu_addr_end = new_addr + new_size - 1;
478 u64 page_start = new_addr >> block_page_bits; 489 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
479 const u64 page_end = cpu_addr_end >> block_page_bits; 490 for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
480 while (page_start <= page_end) { 491 blocks.insert_or_assign(page_start, new_buffer);
481 blocks[page_start] = new_buffer;
482 ++page_start;
483 } 492 }
484 return new_buffer; 493 return new_buffer;
485 } 494 }
486 495
487 OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { 496 Buffer* GetBlock(VAddr cpu_addr, std::size_t size) {
488 OwnerBuffer found; 497 std::shared_ptr<Buffer> found;
498
489 const VAddr cpu_addr_end = cpu_addr + size - 1; 499 const VAddr cpu_addr_end = cpu_addr + size - 1;
490 u64 page_start = cpu_addr >> block_page_bits; 500 const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS;
491 const u64 page_end = cpu_addr_end >> block_page_bits; 501 for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) {
492 while (page_start <= page_end) {
493 auto it = blocks.find(page_start); 502 auto it = blocks.find(page_start);
494 if (it == blocks.end()) { 503 if (it == blocks.end()) {
495 if (found) { 504 if (found) {
496 found = EnlargeBlock(found); 505 found = EnlargeBlock(found);
497 } else { 506 continue;
498 const VAddr start_addr = (page_start << block_page_bits);
499 found = CreateBlock(start_addr, block_page_size);
500 blocks[page_start] = found;
501 }
502 } else {
503 if (found) {
504 if (found == it->second) {
505 ++page_start;
506 continue;
507 }
508 found = MergeBlocks(found, it->second);
509 } else {
510 found = it->second;
511 } 507 }
508 const VAddr start_addr = page_start << BLOCK_PAGE_BITS;
509 found = CreateBlock(start_addr, BLOCK_PAGE_SIZE);
510 blocks.insert_or_assign(page_start, found);
511 continue;
512 }
513 if (!found) {
514 found = it->second;
515 continue;
516 }
517 if (found != it->second) {
518 found = MergeBlocks(std::move(found), it->second);
512 } 519 }
513 ++page_start;
514 } 520 }
515 return found; 521 return found.get();
516 } 522 }
517 523
518 void MarkRegionAsWritten(const VAddr start, const VAddr end) { 524 void MarkRegionAsWritten(VAddr start, VAddr end) {
519 u64 page_start = start >> write_page_bit; 525 const u64 page_end = end >> WRITE_PAGE_BIT;
520 const u64 page_end = end >> write_page_bit; 526 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
521 while (page_start <= page_end) {
522 auto it = written_pages.find(page_start); 527 auto it = written_pages.find(page_start);
523 if (it != written_pages.end()) { 528 if (it != written_pages.end()) {
524 it->second = it->second + 1; 529 it->second = it->second + 1;
525 } else { 530 } else {
526 written_pages[page_start] = 1; 531 written_pages.insert_or_assign(page_start, 1);
527 } 532 }
528 ++page_start;
529 } 533 }
530 } 534 }
531 535
532 void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { 536 void UnmarkRegionAsWritten(VAddr start, VAddr end) {
533 u64 page_start = start >> write_page_bit; 537 const u64 page_end = end >> WRITE_PAGE_BIT;
534 const u64 page_end = end >> write_page_bit; 538 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
535 while (page_start <= page_end) {
536 auto it = written_pages.find(page_start); 539 auto it = written_pages.find(page_start);
537 if (it != written_pages.end()) { 540 if (it != written_pages.end()) {
538 if (it->second > 1) { 541 if (it->second > 1) {
@@ -541,22 +544,24 @@ private:
541 written_pages.erase(it); 544 written_pages.erase(it);
542 } 545 }
543 } 546 }
544 ++page_start;
545 } 547 }
546 } 548 }
547 549
548 bool IsRegionWritten(const VAddr start, const VAddr end) const { 550 bool IsRegionWritten(VAddr start, VAddr end) const {
549 u64 page_start = start >> write_page_bit; 551 const u64 page_end = end >> WRITE_PAGE_BIT;
550 const u64 page_end = end >> write_page_bit; 552 for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) {
551 while (page_start <= page_end) {
552 if (written_pages.count(page_start) > 0) { 553 if (written_pages.count(page_start) > 0) {
553 return true; 554 return true;
554 } 555 }
555 ++page_start;
556 } 556 }
557 return false; 557 return false;
558 } 558 }
559 559
560 void QueueDestruction(std::shared_ptr<Buffer> buffer) {
561 buffer->SetEpoch(epoch);
562 pending_destruction.push(std::move(buffer));
563 }
564
560 void MarkForAsyncFlush(MapInterval* map) { 565 void MarkForAsyncFlush(MapInterval* map) {
561 if (!uncommitted_flushes) { 566 if (!uncommitted_flushes) {
562 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); 567 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
@@ -568,9 +573,7 @@ private:
568 Core::System& system; 573 Core::System& system;
569 574
570 std::unique_ptr<StreamBuffer> stream_buffer; 575 std::unique_ptr<StreamBuffer> stream_buffer;
571 BufferType stream_buffer_handle{}; 576 BufferType stream_buffer_handle;
572
573 bool invalidated = false;
574 577
575 u8* buffer_ptr = nullptr; 578 u8* buffer_ptr = nullptr;
576 u64 buffer_offset = 0; 579 u64 buffer_offset = 0;
@@ -580,18 +583,15 @@ private:
580 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> 583 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
581 mapped_addresses; 584 mapped_addresses;
582 585
583 static constexpr u64 write_page_bit = 11;
584 std::unordered_map<u64, u32> written_pages; 586 std::unordered_map<u64, u32> written_pages;
587 std::unordered_map<u64, std::shared_ptr<Buffer>> blocks;
585 588
586 static constexpr u64 block_page_bits = 21; 589 std::queue<std::shared_ptr<Buffer>> pending_destruction;
587 static constexpr u64 block_page_size = 1ULL << block_page_bits;
588 std::unordered_map<u64, OwnerBuffer> blocks;
589
590 std::list<OwnerBuffer> pending_destruction;
591 u64 epoch = 0; 590 u64 epoch = 0;
592 u64 modified_ticks = 0; 591 u64 modified_ticks = 0;
593 592
594 std::vector<u8> staging_buffer; 593 std::vector<u8> staging_buffer;
594
595 std::list<MapInterval*> marked_for_unregister; 595 std::list<MapInterval*> marked_for_unregister;
596 596
597 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; 597 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; 93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
95 u64 offset) const = 0; 95 u64 offset) const = 0;
96 virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
96 virtual u32 GetBoundBuffer() const = 0; 97 virtual u32 GetBoundBuffer() const = 0;
97 98
98 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; 99 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..a82b06a38 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
92 ASSERT(stage == ShaderType::Compute); 92 ASSERT(stage == ShaderType::Compute);
93 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; 93 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
94 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; 94 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
95 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
96}
95 97
96 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 98SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
99 const Texture::TextureHandle tex_handle{handle};
97 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 100 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
98 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 101 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
99 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 102 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..b7f668d88 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,6 +219,8 @@ public:
219 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 219 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
220 u64 offset) const override; 220 u64 offset) const override;
221 221
222 SamplerDescriptor AccessSampler(u32 handle) const override;
223
222 u32 GetBoundBuffer() const override { 224 u32 GetBoundBuffer() const override {
223 return regs.tex_cb_index; 225 return regs.tex_cb_index;
224 } 226 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 004f6b261..c01436295 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -25,9 +25,8 @@ constexpr u32 MacroRegistersStart = 0xE00;
25Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 25Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
26 MemoryManager& memory_manager) 26 MemoryManager& memory_manager)
27 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 27 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
28 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 28 macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
29 dirty.flags.flip(); 29 dirty.flags.flip();
30
31 InitializeRegisterDefaults(); 30 InitializeRegisterDefaults();
32} 31}
33 32
@@ -106,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
106 regs.rasterize_enable = 1; 105 regs.rasterize_enable = 1;
107 regs.rt_separate_frag_data = 1; 106 regs.rt_separate_frag_data = 1;
108 regs.framebuffer_srgb = 1; 107 regs.framebuffer_srgb = 1;
108 regs.line_width_aliased = 1.0f;
109 regs.line_width_smooth = 1.0f;
109 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; 110 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
111 regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
112 regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
110 113
111 shadow_state = regs; 114 shadow_state = regs;
112 115
@@ -116,7 +119,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
116 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; 119 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
117} 120}
118 121
119void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { 122void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
120 // Reset the current macro. 123 // Reset the current macro.
121 executing_macro = 0; 124 executing_macro = 0;
122 125
@@ -125,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
125 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); 128 ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
126 129
127 // Execute the current macro. 130 // Execute the current macro.
128 macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); 131 macro_engine->Execute(*this, macro_positions[entry], parameters);
129 if (mme_draw.current_mode != MMEDrawMode::Undefined) { 132 if (mme_draw.current_mode != MMEDrawMode::Undefined) {
130 FlushMMEInlineDraw(); 133 FlushMMEInlineDraw();
131 } 134 }
@@ -161,7 +164,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
161 164
162 // Call the macro when there are no more parameters in the command buffer 165 // Call the macro when there are no more parameters in the command buffer
163 if (is_last_call) { 166 if (is_last_call) {
164 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); 167 CallMacroMethod(executing_macro, macro_params);
165 macro_params.clear(); 168 macro_params.clear();
166 } 169 }
167 return; 170 return;
@@ -197,7 +200,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
197 break; 200 break;
198 } 201 }
199 case MAXWELL3D_REG_INDEX(macros.data): { 202 case MAXWELL3D_REG_INDEX(macros.data): {
200 ProcessMacroUpload(arg); 203 macro_engine->AddCode(regs.macros.upload_address, arg);
201 break; 204 break;
202 } 205 }
203 case MAXWELL3D_REG_INDEX(macros.bind): { 206 case MAXWELL3D_REG_INDEX(macros.bind): {
@@ -306,7 +309,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
306 309
307 // Call the macro when there are no more parameters in the command buffer 310 // Call the macro when there are no more parameters in the command buffer
308 if (amount == methods_pending) { 311 if (amount == methods_pending) {
309 CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); 312 CallMacroMethod(executing_macro, macro_params);
310 macro_params.clear(); 313 macro_params.clear();
311 } 314 }
312 return; 315 return;
@@ -420,9 +423,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
420} 423}
421 424
422void Maxwell3D::ProcessMacroUpload(u32 data) { 425void Maxwell3D::ProcessMacroUpload(u32 data) {
423 ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), 426 macro_engine->AddCode(regs.macros.upload_address++, data);
424 "upload_address exceeded macro_memory size!");
425 macro_memory[regs.macros.upload_address++] = data;
426} 427}
427 428
428void Maxwell3D::ProcessMacroBind(u32 data) { 429void Maxwell3D::ProcessMacroBind(u32 data) {
@@ -739,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
739 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; 740 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
740 const auto& tex_info_buffer = shader.const_buffers[const_buffer]; 741 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
741 const GPUVAddr tex_info_address = tex_info_buffer.address + offset; 742 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
743 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
744}
742 745
743 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 746SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
747 const Texture::TextureHandle tex_handle{handle};
744 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 748 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
745 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 749 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
746 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 750 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 05dd6b39b..ef1618990 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -23,7 +23,7 @@
23#include "video_core/engines/engine_upload.h" 23#include "video_core/engines/engine_upload.h"
24#include "video_core/engines/shader_type.h" 24#include "video_core/engines/shader_type.h"
25#include "video_core/gpu.h" 25#include "video_core/gpu.h"
26#include "video_core/macro_interpreter.h" 26#include "video_core/macro/macro.h"
27#include "video_core/textures/texture.h" 27#include "video_core/textures/texture.h"
28 28
29namespace Core { 29namespace Core {
@@ -598,6 +598,7 @@ public:
598 BitField<4, 3, u32> block_height; 598 BitField<4, 3, u32> block_height;
599 BitField<8, 3, u32> block_depth; 599 BitField<8, 3, u32> block_depth;
600 BitField<12, 1, InvMemoryLayout> type; 600 BitField<12, 1, InvMemoryLayout> type;
601 BitField<16, 1, u32> is_3d;
601 } memory_layout; 602 } memory_layout;
602 union { 603 union {
603 BitField<0, 16, u32> layers; 604 BitField<0, 16, u32> layers;
@@ -1403,6 +1404,8 @@ public:
1403 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 1404 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1404 u64 offset) const override; 1405 u64 offset) const override;
1405 1406
1407 SamplerDescriptor AccessSampler(u32 handle) const override;
1408
1406 u32 GetBoundBuffer() const override { 1409 u32 GetBoundBuffer() const override {
1407 return regs.tex_cb_index; 1410 return regs.tex_cb_index;
1408 } 1411 }
@@ -1411,17 +1414,16 @@ public:
1411 1414
1412 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; 1415 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
1413 1416
1414 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than 1417 bool ShouldExecute() const {
1415 /// we've seen used. 1418 return execute_on;
1416 using MacroMemory = std::array<u32, 0x40000>; 1419 }
1417 1420
1418 /// Gets a reference to macro memory. 1421 VideoCore::RasterizerInterface& GetRasterizer() {
1419 const MacroMemory& GetMacroMemory() const { 1422 return rasterizer;
1420 return macro_memory;
1421 } 1423 }
1422 1424
1423 bool ShouldExecute() const { 1425 const VideoCore::RasterizerInterface& GetRasterizer() const {
1424 return execute_on; 1426 return rasterizer;
1425 } 1427 }
1426 1428
1427 /// Notify a memory write has happened. 1429 /// Notify a memory write has happened.
@@ -1468,16 +1470,13 @@ private:
1468 1470
1469 std::array<bool, Regs::NUM_REGS> mme_inline{}; 1471 std::array<bool, Regs::NUM_REGS> mme_inline{};
1470 1472
1471 /// Memory for macro code
1472 MacroMemory macro_memory;
1473
1474 /// Macro method that is currently being executed / being fed parameters. 1473 /// Macro method that is currently being executed / being fed parameters.
1475 u32 executing_macro = 0; 1474 u32 executing_macro = 0;
1476 /// Parameters that have been submitted to the macro call so far. 1475 /// Parameters that have been submitted to the macro call so far.
1477 std::vector<u32> macro_params; 1476 std::vector<u32> macro_params;
1478 1477
1479 /// Interpreter for the macro codes uploaded to the GPU. 1478 /// Interpreter for the macro codes uploaded to the GPU.
1480 MacroInterpreter macro_interpreter; 1479 std::unique_ptr<MacroEngine> macro_engine;
1481 1480
1482 static constexpr u32 null_cb_data = 0xFFFFFFFF; 1481 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1483 struct { 1482 struct {
@@ -1506,7 +1505,7 @@ private:
1506 * @param num_parameters Number of arguments 1505 * @param num_parameters Number of arguments
1507 * @param parameters Arguments to the method call 1506 * @param parameters Arguments to the method call
1508 */ 1507 */
1509 void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters); 1508 void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
1510 1509
1511 /// Handles writes to the macro uploading register. 1510 /// Handles writes to the macro uploading register.
1512 void ProcessMacroUpload(u32 data); 1511 void ProcessMacroUpload(u32 data);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
661 constexpr Instruction(u64 value) : value{value} {} 661 constexpr Instruction(u64 value) : value{value} {}
662 constexpr Instruction(const Instruction& instr) : value(instr.value) {} 662 constexpr Instruction(const Instruction& instr) : value(instr.value) {}
663 663
664 constexpr bool Bit(u64 offset) const {
665 return ((value >> offset) & 1) != 0;
666 }
667
664 BitField<0, 8, Register> gpr0; 668 BitField<0, 8, Register> gpr0;
665 BitField<8, 8, Register> gpr8; 669 BitField<8, 8, Register> gpr8;
666 union { 670 union {
@@ -1874,7 +1878,9 @@ public:
1874 HSETP2_C, 1878 HSETP2_C,
1875 HSETP2_R, 1879 HSETP2_R,
1876 HSETP2_IMM, 1880 HSETP2_IMM,
1881 HSET2_C,
1877 HSET2_R, 1882 HSET2_R,
1883 HSET2_IMM,
1878 POPC_C, 1884 POPC_C,
1879 POPC_R, 1885 POPC_R,
1880 POPC_IMM, 1886 POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
2194 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), 2200 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
2195 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), 2201 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2196 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), 2202 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2203 INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
2197 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 2204 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2205 INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
2198 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), 2206 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2199 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), 2207 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2200 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 2208 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
new file mode 100644
index 000000000..ef7dad349
--- /dev/null
+++ b/src/video_core/macro/macro.cpp
@@ -0,0 +1,72 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <boost/container_hash/hash.hpp>
6#include "common/assert.h"
7#include "common/logging/log.h"
8#include "core/settings.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/macro/macro.h"
11#include "video_core/macro/macro_hle.h"
12#include "video_core/macro/macro_interpreter.h"
13#include "video_core/macro/macro_jit_x64.h"
14
15namespace Tegra {
16
17MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d)
18 : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d)} {}
19
20MacroEngine::~MacroEngine() = default;
21
22void MacroEngine::AddCode(u32 method, u32 data) {
23 uploaded_macro_code[method].push_back(data);
24}
25
26void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
27 const std::vector<u32>& parameters) {
28 auto compiled_macro = macro_cache.find(method);
29 if (compiled_macro != macro_cache.end()) {
30 const auto& cache_info = compiled_macro->second;
31 if (cache_info.has_hle_program) {
32 cache_info.hle_program->Execute(parameters, method);
33 } else {
34 cache_info.lle_program->Execute(parameters, method);
35 }
36 } else {
37 // Macro not compiled, check if it's uploaded and if so, compile it
38 auto macro_code = uploaded_macro_code.find(method);
39 if (macro_code == uploaded_macro_code.end()) {
40 UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
41 return;
42 }
43 auto& cache_info = macro_cache[method];
44 cache_info.hash = boost::hash_value(macro_code->second);
45 cache_info.lle_program = Compile(macro_code->second);
46
47 auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
48 if (hle_program.has_value()) {
49 cache_info.has_hle_program = true;
50 cache_info.hle_program = std::move(hle_program.value());
51 }
52
53 if (cache_info.has_hle_program) {
54 cache_info.hle_program->Execute(parameters, method);
55 } else {
56 cache_info.lle_program->Execute(parameters, method);
57 }
58 }
59}
60
61std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
62 if (Settings::values.disable_macro_jit) {
63 return std::make_unique<MacroInterpreter>(maxwell3d);
64 }
65#ifdef ARCHITECTURE_x86_64
66 return std::make_unique<MacroJITx64>(maxwell3d);
67#else
68 return std::make_unique<MacroInterpreter>(maxwell3d);
69#endif
70}
71
72} // namespace Tegra
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h
new file mode 100644
index 000000000..4d00b84b0
--- /dev/null
+++ b/src/video_core/macro/macro.h
@@ -0,0 +1,141 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9#include <vector>
10#include "common/bit_field.h"
11#include "common/common_types.h"
12
13namespace Tegra {
14
15namespace Engines {
16class Maxwell3D;
17}
18
19namespace Macro {
20constexpr std::size_t NUM_MACRO_REGISTERS = 8;
21enum class Operation : u32 {
22 ALU = 0,
23 AddImmediate = 1,
24 ExtractInsert = 2,
25 ExtractShiftLeftImmediate = 3,
26 ExtractShiftLeftRegister = 4,
27 Read = 5,
28 Unused = 6, // This operation doesn't seem to be a valid encoding.
29 Branch = 7,
30};
31
32enum class ALUOperation : u32 {
33 Add = 0,
34 AddWithCarry = 1,
35 Subtract = 2,
36 SubtractWithBorrow = 3,
37 // Operations 4-7 don't seem to be valid encodings.
38 Xor = 8,
39 Or = 9,
40 And = 10,
41 AndNot = 11,
42 Nand = 12
43};
44
45enum class ResultOperation : u32 {
46 IgnoreAndFetch = 0,
47 Move = 1,
48 MoveAndSetMethod = 2,
49 FetchAndSend = 3,
50 MoveAndSend = 4,
51 FetchAndSetMethod = 5,
52 MoveAndSetMethodFetchAndSend = 6,
53 MoveAndSetMethodSend = 7
54};
55
56enum class BranchCondition : u32 {
57 Zero = 0,
58 NotZero = 1,
59};
60
61union Opcode {
62 u32 raw;
63 BitField<0, 3, Operation> operation;
64 BitField<4, 3, ResultOperation> result_operation;
65 BitField<4, 1, BranchCondition> branch_condition;
66 // If set on a branch, then the branch doesn't have a delay slot.
67 BitField<5, 1, u32> branch_annul;
68 BitField<7, 1, u32> is_exit;
69 BitField<8, 3, u32> dst;
70 BitField<11, 3, u32> src_a;
71 BitField<14, 3, u32> src_b;
72 // The signed immediate overlaps the second source operand and the alu operation.
73 BitField<14, 18, s32> immediate;
74
75 BitField<17, 5, ALUOperation> alu_operation;
76
77 // Bitfield instructions data
78 BitField<17, 5, u32> bf_src_bit;
79 BitField<22, 5, u32> bf_size;
80 BitField<27, 5, u32> bf_dst_bit;
81
82 u32 GetBitfieldMask() const {
83 return (1 << bf_size) - 1;
84 }
85
86 s32 GetBranchTarget() const {
87 return static_cast<s32>(immediate * sizeof(u32));
88 }
89};
90
91union MethodAddress {
92 u32 raw;
93 BitField<0, 12, u32> address;
94 BitField<12, 6, u32> increment;
95};
96
97} // namespace Macro
98
99class HLEMacro;
100
101class CachedMacro {
102public:
103 virtual ~CachedMacro() = default;
104 /**
105 * Executes the macro code with the specified input parameters.
106 * @param code The macro byte code to execute
107 * @param parameters The parameters of the macro
108 */
109 virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
110};
111
112class MacroEngine {
113public:
114 explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
115 virtual ~MacroEngine();
116
117 // Store the uploaded macro code to compile them when they're called.
118 void AddCode(u32 method, u32 data);
119
120 // Compiles the macro if its not in the cache, and executes the compiled macro
121 void Execute(Engines::Maxwell3D& maxwell3d, u32 method, const std::vector<u32>& parameters);
122
123protected:
124 virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
125
126private:
127 struct CacheInfo {
128 std::unique_ptr<CachedMacro> lle_program{};
129 std::unique_ptr<CachedMacro> hle_program{};
130 u64 hash{};
131 bool has_hle_program{};
132 };
133
134 std::unordered_map<u32, CacheInfo> macro_cache;
135 std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
136 std::unique_ptr<HLEMacro> hle_macros;
137};
138
139std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
140
141} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
new file mode 100644
index 000000000..410f99018
--- /dev/null
+++ b/src/video_core/macro/macro_hle.cpp
@@ -0,0 +1,113 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <vector>
7#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro/macro_hle.h"
9#include "video_core/rasterizer_interface.h"
10
11namespace Tegra {
12
13namespace {
14// HLE'd functions
15static void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d,
16 const std::vector<u32>& parameters) {
17 const u32 instance_count = parameters[2] & maxwell3d.GetRegisterValue(0xD1B);
18
19 maxwell3d.regs.draw.topology.Assign(
20 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0] &
21 ~(0x3ffffff << 26)));
22 maxwell3d.regs.vb_base_instance = parameters[5];
23 maxwell3d.mme_draw.instance_count = instance_count;
24 maxwell3d.regs.vb_element_base = parameters[3];
25 maxwell3d.regs.index_array.count = parameters[1];
26 maxwell3d.regs.index_array.first = parameters[4];
27
28 if (maxwell3d.ShouldExecute()) {
29 maxwell3d.GetRasterizer().Draw(true, true);
30 }
31 maxwell3d.regs.index_array.count = 0;
32 maxwell3d.mme_draw.instance_count = 0;
33 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
34}
35
36static void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d,
37 const std::vector<u32>& parameters) {
38 const u32 count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
39
40 maxwell3d.regs.vertex_buffer.first = parameters[3];
41 maxwell3d.regs.vertex_buffer.count = parameters[1];
42 maxwell3d.regs.vb_base_instance = parameters[4];
43 maxwell3d.regs.draw.topology.Assign(
44 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
45 maxwell3d.mme_draw.instance_count = count;
46
47 if (maxwell3d.ShouldExecute()) {
48 maxwell3d.GetRasterizer().Draw(false, true);
49 }
50 maxwell3d.regs.vertex_buffer.count = 0;
51 maxwell3d.mme_draw.instance_count = 0;
52 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
53}
54
55static void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d,
56 const std::vector<u32>& parameters) {
57 const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
58 const u32 element_base = parameters[4];
59 const u32 base_instance = parameters[5];
60 maxwell3d.regs.index_array.first = parameters[3];
61 maxwell3d.regs.reg_array[0x446] = element_base; // vertex id base?
62 maxwell3d.regs.index_array.count = parameters[1];
63 maxwell3d.regs.vb_element_base = element_base;
64 maxwell3d.regs.vb_base_instance = base_instance;
65 maxwell3d.mme_draw.instance_count = instance_count;
66 maxwell3d.CallMethodFromMME(0x8e3, 0x640);
67 maxwell3d.CallMethodFromMME(0x8e4, element_base);
68 maxwell3d.CallMethodFromMME(0x8e5, base_instance);
69 maxwell3d.regs.draw.topology.Assign(
70 static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
71 if (maxwell3d.ShouldExecute()) {
72 maxwell3d.GetRasterizer().Draw(true, true);
73 }
74 maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
75 maxwell3d.regs.index_array.count = 0;
76 maxwell3d.regs.vb_element_base = 0x0;
77 maxwell3d.regs.vb_base_instance = 0x0;
78 maxwell3d.mme_draw.instance_count = 0;
79 maxwell3d.CallMethodFromMME(0x8e3, 0x640);
80 maxwell3d.CallMethodFromMME(0x8e4, 0x0);
81 maxwell3d.CallMethodFromMME(0x8e5, 0x0);
82 maxwell3d.mme_draw.current_mode = Engines::Maxwell3D::MMEDrawMode::Undefined;
83}
84} // namespace
85
86constexpr std::array<std::pair<u64, HLEFunction>, 3> hle_funcs{{
87 std::make_pair<u64, HLEFunction>(0x771BB18C62444DA0, &HLE_771BB18C62444DA0),
88 std::make_pair<u64, HLEFunction>(0x0D61FC9FAAC9FCAD, &HLE_0D61FC9FAAC9FCAD),
89 std::make_pair<u64, HLEFunction>(0x0217920100488FF7, &HLE_0217920100488FF7),
90}};
91
92HLEMacro::HLEMacro(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
93HLEMacro::~HLEMacro() = default;
94
95std::optional<std::unique_ptr<CachedMacro>> HLEMacro::GetHLEProgram(u64 hash) const {
96 const auto it = std::find_if(hle_funcs.cbegin(), hle_funcs.cend(),
97 [hash](const auto& pair) { return pair.first == hash; });
98 if (it == hle_funcs.end()) {
99 return std::nullopt;
100 }
101 return std::make_unique<HLEMacroImpl>(maxwell3d, it->second);
102}
103
104HLEMacroImpl::~HLEMacroImpl() = default;
105
106HLEMacroImpl::HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func)
107 : maxwell3d(maxwell3d), func(func) {}
108
109void HLEMacroImpl::Execute(const std::vector<u32>& parameters, u32 method) {
110 func(maxwell3d, parameters);
111}
112
113} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
new file mode 100644
index 000000000..37af875a0
--- /dev/null
+++ b/src/video_core/macro/macro_hle.h
@@ -0,0 +1,44 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <optional>
9#include <vector>
10#include "common/common_types.h"
11#include "video_core/macro/macro.h"
12
13namespace Tegra {
14
15namespace Engines {
16class Maxwell3D;
17}
18
19using HLEFunction = void (*)(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& parameters);
20
21class HLEMacro {
22public:
23 explicit HLEMacro(Engines::Maxwell3D& maxwell3d);
24 ~HLEMacro();
25
26 std::optional<std::unique_ptr<CachedMacro>> GetHLEProgram(u64 hash) const;
27
28private:
29 Engines::Maxwell3D& maxwell3d;
30};
31
32class HLEMacroImpl : public CachedMacro {
33public:
34 explicit HLEMacroImpl(Engines::Maxwell3D& maxwell3d, HLEFunction func);
35 ~HLEMacroImpl();
36
37 void Execute(const std::vector<u32>& parameters, u32 method) override;
38
39private:
40 Engines::Maxwell3D& maxwell3d;
41 HLEFunction func;
42};
43
44} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index 947364928..aa5256419 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -1,4 +1,4 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -6,109 +6,47 @@
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/microprofile.h" 7#include "common/microprofile.h"
8#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/macro_interpreter.h" 9#include "video_core/macro/macro_interpreter.h"
10 10
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); 11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12 12
13namespace Tegra { 13namespace Tegra {
14namespace { 14MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d)
15enum class Operation : u32 { 15 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
16 ALU = 0,
17 AddImmediate = 1,
18 ExtractInsert = 2,
19 ExtractShiftLeftImmediate = 3,
20 ExtractShiftLeftRegister = 4,
21 Read = 5,
22 Unused = 6, // This operation doesn't seem to be a valid encoding.
23 Branch = 7,
24};
25} // Anonymous namespace
26
27enum class MacroInterpreter::ALUOperation : u32 {
28 Add = 0,
29 AddWithCarry = 1,
30 Subtract = 2,
31 SubtractWithBorrow = 3,
32 // Operations 4-7 don't seem to be valid encodings.
33 Xor = 8,
34 Or = 9,
35 And = 10,
36 AndNot = 11,
37 Nand = 12
38};
39
40enum class MacroInterpreter::ResultOperation : u32 {
41 IgnoreAndFetch = 0,
42 Move = 1,
43 MoveAndSetMethod = 2,
44 FetchAndSend = 3,
45 MoveAndSend = 4,
46 FetchAndSetMethod = 5,
47 MoveAndSetMethodFetchAndSend = 6,
48 MoveAndSetMethodSend = 7
49};
50
51enum class MacroInterpreter::BranchCondition : u32 {
52 Zero = 0,
53 NotZero = 1,
54};
55
56union MacroInterpreter::Opcode {
57 u32 raw;
58 BitField<0, 3, Operation> operation;
59 BitField<4, 3, ResultOperation> result_operation;
60 BitField<4, 1, BranchCondition> branch_condition;
61 // If set on a branch, then the branch doesn't have a delay slot.
62 BitField<5, 1, u32> branch_annul;
63 BitField<7, 1, u32> is_exit;
64 BitField<8, 3, u32> dst;
65 BitField<11, 3, u32> src_a;
66 BitField<14, 3, u32> src_b;
67 // The signed immediate overlaps the second source operand and the alu operation.
68 BitField<14, 18, s32> immediate;
69
70 BitField<17, 5, ALUOperation> alu_operation;
71
72 // Bitfield instructions data
73 BitField<17, 5, u32> bf_src_bit;
74 BitField<22, 5, u32> bf_size;
75 BitField<27, 5, u32> bf_dst_bit;
76
77 u32 GetBitfieldMask() const {
78 return (1 << bf_size) - 1;
79 }
80 16
81 s32 GetBranchTarget() const { 17std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
82 return static_cast<s32>(immediate * sizeof(u32)); 18 return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
83 } 19}
84};
85 20
86MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 21MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d,
22 const std::vector<u32>& code)
23 : maxwell3d(maxwell3d), code(code) {}
87 24
88void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) { 25void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) {
89 MICROPROFILE_SCOPE(MacroInterp); 26 MICROPROFILE_SCOPE(MacroInterp);
90 Reset(); 27 Reset();
91 28
92 registers[1] = parameters[0]; 29 registers[1] = parameters[0];
30 num_parameters = parameters.size();
93 31
94 if (num_parameters > parameters_capacity) { 32 if (num_parameters > parameters_capacity) {
95 parameters_capacity = num_parameters; 33 parameters_capacity = num_parameters;
96 this->parameters = std::make_unique<u32[]>(num_parameters); 34 this->parameters = std::make_unique<u32[]>(num_parameters);
97 } 35 }
98 std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32)); 36 std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
99 this->num_parameters = num_parameters; 37 this->num_parameters = num_parameters;
100 38
101 // Execute the code until we hit an exit condition. 39 // Execute the code until we hit an exit condition.
102 bool keep_executing = true; 40 bool keep_executing = true;
103 while (keep_executing) { 41 while (keep_executing) {
104 keep_executing = Step(offset, false); 42 keep_executing = Step(false);
105 } 43 }
106 44
107 // Assert the the macro used all the input parameters 45 // Assert the the macro used all the input parameters
108 ASSERT(next_parameter_index == num_parameters); 46 ASSERT(next_parameter_index == num_parameters);
109} 47}
110 48
111void MacroInterpreter::Reset() { 49void MacroInterpreterImpl::Reset() {
112 registers = {}; 50 registers = {};
113 pc = 0; 51 pc = 0;
114 delayed_pc = {}; 52 delayed_pc = {};
@@ -120,10 +58,10 @@ void MacroInterpreter::Reset() {
120 carry_flag = false; 58 carry_flag = false;
121} 59}
122 60
123bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { 61bool MacroInterpreterImpl::Step(bool is_delay_slot) {
124 u32 base_address = pc; 62 u32 base_address = pc;
125 63
126 Opcode opcode = GetOpcode(offset); 64 Macro::Opcode opcode = GetOpcode();
127 pc += 4; 65 pc += 4;
128 66
129 // Update the program counter if we were delayed 67 // Update the program counter if we were delayed
@@ -134,18 +72,18 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
134 } 72 }
135 73
136 switch (opcode.operation) { 74 switch (opcode.operation) {
137 case Operation::ALU: { 75 case Macro::Operation::ALU: {
138 u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), 76 u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
139 GetRegister(opcode.src_b)); 77 GetRegister(opcode.src_b));
140 ProcessResult(opcode.result_operation, opcode.dst, result); 78 ProcessResult(opcode.result_operation, opcode.dst, result);
141 break; 79 break;
142 } 80 }
143 case Operation::AddImmediate: { 81 case Macro::Operation::AddImmediate: {
144 ProcessResult(opcode.result_operation, opcode.dst, 82 ProcessResult(opcode.result_operation, opcode.dst,
145 GetRegister(opcode.src_a) + opcode.immediate); 83 GetRegister(opcode.src_a) + opcode.immediate);
146 break; 84 break;
147 } 85 }
148 case Operation::ExtractInsert: { 86 case Macro::Operation::ExtractInsert: {
149 u32 dst = GetRegister(opcode.src_a); 87 u32 dst = GetRegister(opcode.src_a);
150 u32 src = GetRegister(opcode.src_b); 88 u32 src = GetRegister(opcode.src_b);
151 89
@@ -155,7 +93,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
155 ProcessResult(opcode.result_operation, opcode.dst, dst); 93 ProcessResult(opcode.result_operation, opcode.dst, dst);
156 break; 94 break;
157 } 95 }
158 case Operation::ExtractShiftLeftImmediate: { 96 case Macro::Operation::ExtractShiftLeftImmediate: {
159 u32 dst = GetRegister(opcode.src_a); 97 u32 dst = GetRegister(opcode.src_a);
160 u32 src = GetRegister(opcode.src_b); 98 u32 src = GetRegister(opcode.src_b);
161 99
@@ -164,7 +102,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
164 ProcessResult(opcode.result_operation, opcode.dst, result); 102 ProcessResult(opcode.result_operation, opcode.dst, result);
165 break; 103 break;
166 } 104 }
167 case Operation::ExtractShiftLeftRegister: { 105 case Macro::Operation::ExtractShiftLeftRegister: {
168 u32 dst = GetRegister(opcode.src_a); 106 u32 dst = GetRegister(opcode.src_a);
169 u32 src = GetRegister(opcode.src_b); 107 u32 src = GetRegister(opcode.src_b);
170 108
@@ -173,12 +111,12 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
173 ProcessResult(opcode.result_operation, opcode.dst, result); 111 ProcessResult(opcode.result_operation, opcode.dst, result);
174 break; 112 break;
175 } 113 }
176 case Operation::Read: { 114 case Macro::Operation::Read: {
177 u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); 115 u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
178 ProcessResult(opcode.result_operation, opcode.dst, result); 116 ProcessResult(opcode.result_operation, opcode.dst, result);
179 break; 117 break;
180 } 118 }
181 case Operation::Branch: { 119 case Macro::Operation::Branch: {
182 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); 120 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
183 u32 value = GetRegister(opcode.src_a); 121 u32 value = GetRegister(opcode.src_a);
184 bool taken = EvaluateBranchCondition(opcode.branch_condition, value); 122 bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
@@ -191,7 +129,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
191 129
192 delayed_pc = base_address + opcode.GetBranchTarget(); 130 delayed_pc = base_address + opcode.GetBranchTarget();
193 // Execute one more instruction due to the delay slot. 131 // Execute one more instruction due to the delay slot.
194 return Step(offset, true); 132 return Step(true);
195 } 133 }
196 break; 134 break;
197 } 135 }
@@ -204,51 +142,44 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
204 // cause an exit if it's executed inside a delay slot. 142 // cause an exit if it's executed inside a delay slot.
205 if (opcode.is_exit && !is_delay_slot) { 143 if (opcode.is_exit && !is_delay_slot) {
206 // Exit has a delay slot, execute the next instruction 144 // Exit has a delay slot, execute the next instruction
207 Step(offset, true); 145 Step(true);
208 return false; 146 return false;
209 } 147 }
210 148
211 return true; 149 return true;
212} 150}
213 151
214MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { 152u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
215 const auto& macro_memory{maxwell3d.GetMacroMemory()};
216 ASSERT((pc % sizeof(u32)) == 0);
217 ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
218 return {macro_memory[offset + pc / sizeof(u32)]};
219}
220
221u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) {
222 switch (operation) { 153 switch (operation) {
223 case ALUOperation::Add: { 154 case Macro::ALUOperation::Add: {
224 const u64 result{static_cast<u64>(src_a) + src_b}; 155 const u64 result{static_cast<u64>(src_a) + src_b};
225 carry_flag = result > 0xffffffff; 156 carry_flag = result > 0xffffffff;
226 return static_cast<u32>(result); 157 return static_cast<u32>(result);
227 } 158 }
228 case ALUOperation::AddWithCarry: { 159 case Macro::ALUOperation::AddWithCarry: {
229 const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; 160 const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
230 carry_flag = result > 0xffffffff; 161 carry_flag = result > 0xffffffff;
231 return static_cast<u32>(result); 162 return static_cast<u32>(result);
232 } 163 }
233 case ALUOperation::Subtract: { 164 case Macro::ALUOperation::Subtract: {
234 const u64 result{static_cast<u64>(src_a) - src_b}; 165 const u64 result{static_cast<u64>(src_a) - src_b};
235 carry_flag = result < 0x100000000; 166 carry_flag = result < 0x100000000;
236 return static_cast<u32>(result); 167 return static_cast<u32>(result);
237 } 168 }
238 case ALUOperation::SubtractWithBorrow: { 169 case Macro::ALUOperation::SubtractWithBorrow: {
239 const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; 170 const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
240 carry_flag = result < 0x100000000; 171 carry_flag = result < 0x100000000;
241 return static_cast<u32>(result); 172 return static_cast<u32>(result);
242 } 173 }
243 case ALUOperation::Xor: 174 case Macro::ALUOperation::Xor:
244 return src_a ^ src_b; 175 return src_a ^ src_b;
245 case ALUOperation::Or: 176 case Macro::ALUOperation::Or:
246 return src_a | src_b; 177 return src_a | src_b;
247 case ALUOperation::And: 178 case Macro::ALUOperation::And:
248 return src_a & src_b; 179 return src_a & src_b;
249 case ALUOperation::AndNot: 180 case Macro::ALUOperation::AndNot:
250 return src_a & ~src_b; 181 return src_a & ~src_b;
251 case ALUOperation::Nand: 182 case Macro::ALUOperation::Nand:
252 return ~(src_a & src_b); 183 return ~(src_a & src_b);
253 184
254 default: 185 default:
@@ -257,43 +188,43 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b)
257 } 188 }
258} 189}
259 190
260void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) { 191void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
261 switch (operation) { 192 switch (operation) {
262 case ResultOperation::IgnoreAndFetch: 193 case Macro::ResultOperation::IgnoreAndFetch:
263 // Fetch parameter and ignore result. 194 // Fetch parameter and ignore result.
264 SetRegister(reg, FetchParameter()); 195 SetRegister(reg, FetchParameter());
265 break; 196 break;
266 case ResultOperation::Move: 197 case Macro::ResultOperation::Move:
267 // Move result. 198 // Move result.
268 SetRegister(reg, result); 199 SetRegister(reg, result);
269 break; 200 break;
270 case ResultOperation::MoveAndSetMethod: 201 case Macro::ResultOperation::MoveAndSetMethod:
271 // Move result and use as Method Address. 202 // Move result and use as Method Address.
272 SetRegister(reg, result); 203 SetRegister(reg, result);
273 SetMethodAddress(result); 204 SetMethodAddress(result);
274 break; 205 break;
275 case ResultOperation::FetchAndSend: 206 case Macro::ResultOperation::FetchAndSend:
276 // Fetch parameter and send result. 207 // Fetch parameter and send result.
277 SetRegister(reg, FetchParameter()); 208 SetRegister(reg, FetchParameter());
278 Send(result); 209 Send(result);
279 break; 210 break;
280 case ResultOperation::MoveAndSend: 211 case Macro::ResultOperation::MoveAndSend:
281 // Move and send result. 212 // Move and send result.
282 SetRegister(reg, result); 213 SetRegister(reg, result);
283 Send(result); 214 Send(result);
284 break; 215 break;
285 case ResultOperation::FetchAndSetMethod: 216 case Macro::ResultOperation::FetchAndSetMethod:
286 // Fetch parameter and use result as Method Address. 217 // Fetch parameter and use result as Method Address.
287 SetRegister(reg, FetchParameter()); 218 SetRegister(reg, FetchParameter());
288 SetMethodAddress(result); 219 SetMethodAddress(result);
289 break; 220 break;
290 case ResultOperation::MoveAndSetMethodFetchAndSend: 221 case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
291 // Move result and use as Method Address, then fetch and send parameter. 222 // Move result and use as Method Address, then fetch and send parameter.
292 SetRegister(reg, result); 223 SetRegister(reg, result);
293 SetMethodAddress(result); 224 SetMethodAddress(result);
294 Send(FetchParameter()); 225 Send(FetchParameter());
295 break; 226 break;
296 case ResultOperation::MoveAndSetMethodSend: 227 case Macro::ResultOperation::MoveAndSetMethodSend:
297 // Move result and use as Method Address, then send bits 12:17 of result. 228 // Move result and use as Method Address, then send bits 12:17 of result.
298 SetRegister(reg, result); 229 SetRegister(reg, result);
299 SetMethodAddress(result); 230 SetMethodAddress(result);
@@ -304,16 +235,28 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
304 } 235 }
305} 236}
306 237
307u32 MacroInterpreter::FetchParameter() { 238bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
308 ASSERT(next_parameter_index < num_parameters); 239 switch (cond) {
309 return parameters[next_parameter_index++]; 240 case Macro::BranchCondition::Zero:
241 return value == 0;
242 case Macro::BranchCondition::NotZero:
243 return value != 0;
244 }
245 UNREACHABLE();
246 return true;
247}
248
249Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
250 ASSERT((pc % sizeof(u32)) == 0);
251 ASSERT(pc < code.size() * sizeof(u32));
252 return {code[pc / sizeof(u32)]};
310} 253}
311 254
312u32 MacroInterpreter::GetRegister(u32 register_id) const { 255u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
313 return registers.at(register_id); 256 return registers.at(register_id);
314} 257}
315 258
316void MacroInterpreter::SetRegister(u32 register_id, u32 value) { 259void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
317 // Register 0 is hardwired as the zero register. 260 // Register 0 is hardwired as the zero register.
318 // Ensure no writes to it actually occur. 261 // Ensure no writes to it actually occur.
319 if (register_id == 0) { 262 if (register_id == 0) {
@@ -323,30 +266,24 @@ void MacroInterpreter::SetRegister(u32 register_id, u32 value) {
323 registers.at(register_id) = value; 266 registers.at(register_id) = value;
324} 267}
325 268
326void MacroInterpreter::SetMethodAddress(u32 address) { 269void MacroInterpreterImpl::SetMethodAddress(u32 address) {
327 method_address.raw = address; 270 method_address.raw = address;
328} 271}
329 272
330void MacroInterpreter::Send(u32 value) { 273void MacroInterpreterImpl::Send(u32 value) {
331 maxwell3d.CallMethodFromMME(method_address.address, value); 274 maxwell3d.CallMethodFromMME(method_address.address, value);
332 // Increment the method address by the method increment. 275 // Increment the method address by the method increment.
333 method_address.address.Assign(method_address.address.Value() + 276 method_address.address.Assign(method_address.address.Value() +
334 method_address.increment.Value()); 277 method_address.increment.Value());
335} 278}
336 279
337u32 MacroInterpreter::Read(u32 method) const { 280u32 MacroInterpreterImpl::Read(u32 method) const {
338 return maxwell3d.GetRegisterValue(method); 281 return maxwell3d.GetRegisterValue(method);
339} 282}
340 283
341bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const { 284u32 MacroInterpreterImpl::FetchParameter() {
342 switch (cond) { 285 ASSERT(next_parameter_index < num_parameters);
343 case BranchCondition::Zero: 286 return parameters[next_parameter_index++];
344 return value == 0;
345 case BranchCondition::NotZero:
346 return value != 0;
347 }
348 UNREACHABLE();
349 return true;
350} 287}
351 288
352} // namespace Tegra 289} // namespace Tegra
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
index 631146d89..90217fc89 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro/macro_interpreter.h
@@ -1,44 +1,37 @@
1// Copyright 2018 yuzu Emulator Project 1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#pragma once 5#pragma once
6
7#include <array> 6#include <array>
8#include <optional> 7#include <optional>
9 8#include <vector>
10#include "common/bit_field.h" 9#include "common/bit_field.h"
11#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/macro/macro.h"
12 12
13namespace Tegra { 13namespace Tegra {
14namespace Engines { 14namespace Engines {
15class Maxwell3D; 15class Maxwell3D;
16} 16}
17 17
18class MacroInterpreter final { 18class MacroInterpreter final : public MacroEngine {
19public: 19public:
20 explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); 20 explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d);
21 21
22 /** 22protected:
23 * Executes the macro code with the specified input parameters. 23 std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
24 * @param offset Offset to start execution at.
25 * @param parameters The parameters of the macro.
26 */
27 void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
28 24
29private: 25private:
30 enum class ALUOperation : u32; 26 Engines::Maxwell3D& maxwell3d;
31 enum class BranchCondition : u32; 27};
32 enum class ResultOperation : u32;
33
34 union Opcode;
35 28
36 union MethodAddress { 29class MacroInterpreterImpl : public CachedMacro {
37 u32 raw; 30public:
38 BitField<0, 12, u32> address; 31 MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
39 BitField<12, 6, u32> increment; 32 void Execute(const std::vector<u32>& parameters, u32 method) override;
40 };
41 33
34private:
42 /// Resets the execution engine state, zeroing registers, etc. 35 /// Resets the execution engine state, zeroing registers, etc.
43 void Reset(); 36 void Reset();
44 37
@@ -49,20 +42,20 @@ private:
49 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a 42 * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
50 * previous instruction. 43 * previous instruction.
51 */ 44 */
52 bool Step(u32 offset, bool is_delay_slot); 45 bool Step(bool is_delay_slot);
53 46
54 /// Calculates the result of an ALU operation. src_a OP src_b; 47 /// Calculates the result of an ALU operation. src_a OP src_b;
55 u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b); 48 u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
56 49
57 /// Performs the result operation on the input result and stores it in the specified register 50 /// Performs the result operation on the input result and stores it in the specified register
58 /// (if necessary). 51 /// (if necessary).
59 void ProcessResult(ResultOperation operation, u32 reg, u32 result); 52 void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
60 53
61 /// Evaluates the branch condition and returns whether the branch should be taken or not. 54 /// Evaluates the branch condition and returns whether the branch should be taken or not.
62 bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; 55 bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
63 56
64 /// Reads an opcode at the current program counter location. 57 /// Reads an opcode at the current program counter location.
65 Opcode GetOpcode(u32 offset) const; 58 Macro::Opcode GetOpcode() const;
66 59
67 /// Returns the specified register's value. Register 0 is hardcoded to always return 0. 60 /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
68 u32 GetRegister(u32 register_id) const; 61 u32 GetRegister(u32 register_id) const;
@@ -89,13 +82,11 @@ private:
89 /// Program counter to execute at after the delay slot is executed. 82 /// Program counter to execute at after the delay slot is executed.
90 std::optional<u32> delayed_pc; 83 std::optional<u32> delayed_pc;
91 84
92 static constexpr std::size_t NumMacroRegisters = 8;
93
94 /// General purpose macro registers. 85 /// General purpose macro registers.
95 std::array<u32, NumMacroRegisters> registers = {}; 86 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
96 87
97 /// Method address to use for the next Send instruction. 88 /// Method address to use for the next Send instruction.
98 MethodAddress method_address = {}; 89 Macro::MethodAddress method_address = {};
99 90
100 /// Input parameters of the current macro. 91 /// Input parameters of the current macro.
101 std::unique_ptr<u32[]> parameters; 92 std::unique_ptr<u32[]> parameters;
@@ -105,5 +96,7 @@ private:
105 u32 next_parameter_index = 0; 96 u32 next_parameter_index = 0;
106 97
107 bool carry_flag = false; 98 bool carry_flag = false;
99 const std::vector<u32>& code;
108}; 100};
101
109} // namespace Tegra 102} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
new file mode 100644
index 000000000..07292702f
--- /dev/null
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -0,0 +1,621 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/logging/log.h"
7#include "common/microprofile.h"
8#include "common/x64/xbyak_util.h"
9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/macro/macro_interpreter.h"
11#include "video_core/macro/macro_jit_x64.h"
12
13MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47));
14MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
15
16namespace Tegra {
17static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
18static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
19static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
20static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
21static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
22
23static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
24 STATE,
25 RESULT,
26 PARAMETERS,
27 METHOD_ADDRESS,
28 BRANCH_HOLDER,
29});
30
31MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d)
32 : MacroEngine::MacroEngine(maxwell3d), maxwell3d(maxwell3d) {}
33
34std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
35 return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
36}
37
38MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code)
39 : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) {
40 Compile();
41}
42
43MacroJITx64Impl::~MacroJITx64Impl() = default;
44
45void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
46 MICROPROFILE_SCOPE(MacroJitExecute);
47 ASSERT_OR_EXECUTE(program != nullptr, { return; });
48 JITState state{};
49 state.maxwell3d = &maxwell3d;
50 state.registers = {};
51 program(&state, parameters.data());
52}
53
54void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
55 const bool is_a_zero = opcode.src_a == 0;
56 const bool is_b_zero = opcode.src_b == 0;
57 const bool valid_operation = !is_a_zero && !is_b_zero;
58 [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
59 const bool has_zero_register = is_a_zero || is_b_zero;
60 const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
61 opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
62
63 Xbyak::Reg32 src_a;
64 Xbyak::Reg32 src_b;
65
66 if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
67 src_a = Compile_GetRegister(opcode.src_a, RESULT);
68 src_b = Compile_GetRegister(opcode.src_b, eax);
69 } else {
70 if (!is_a_zero) {
71 src_a = Compile_GetRegister(opcode.src_a, RESULT);
72 }
73 if (!is_b_zero) {
74 src_b = Compile_GetRegister(opcode.src_b, eax);
75 }
76 }
77
78 bool has_emitted = false;
79
80 switch (opcode.alu_operation) {
81 case Macro::ALUOperation::Add:
82 if (optimizer.zero_reg_skip) {
83 if (valid_operation) {
84 add(src_a, src_b);
85 }
86 } else {
87 add(src_a, src_b);
88 }
89
90 if (!optimizer.can_skip_carry) {
91 setc(byte[STATE + offsetof(JITState, carry_flag)]);
92 }
93 break;
94 case Macro::ALUOperation::AddWithCarry:
95 bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
96 adc(src_a, src_b);
97 setc(byte[STATE + offsetof(JITState, carry_flag)]);
98 break;
99 case Macro::ALUOperation::Subtract:
100 if (optimizer.zero_reg_skip) {
101 if (valid_operation) {
102 sub(src_a, src_b);
103 has_emitted = true;
104 }
105 } else {
106 sub(src_a, src_b);
107 has_emitted = true;
108 }
109 if (!optimizer.can_skip_carry && has_emitted) {
110 setc(byte[STATE + offsetof(JITState, carry_flag)]);
111 }
112 break;
113 case Macro::ALUOperation::SubtractWithBorrow:
114 bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
115 sbb(src_a, src_b);
116 setc(byte[STATE + offsetof(JITState, carry_flag)]);
117 break;
118 case Macro::ALUOperation::Xor:
119 if (optimizer.zero_reg_skip) {
120 if (valid_operation) {
121 xor_(src_a, src_b);
122 }
123 } else {
124 xor_(src_a, src_b);
125 }
126 break;
127 case Macro::ALUOperation::Or:
128 if (optimizer.zero_reg_skip) {
129 if (valid_operation) {
130 or_(src_a, src_b);
131 }
132 } else {
133 or_(src_a, src_b);
134 }
135 break;
136 case Macro::ALUOperation::And:
137 if (optimizer.zero_reg_skip) {
138 if (!has_zero_register) {
139 and_(src_a, src_b);
140 }
141 } else {
142 and_(src_a, src_b);
143 }
144 break;
145 case Macro::ALUOperation::AndNot:
146 if (optimizer.zero_reg_skip) {
147 if (!is_a_zero) {
148 not_(src_b);
149 and_(src_a, src_b);
150 }
151 } else {
152 not_(src_b);
153 and_(src_a, src_b);
154 }
155 break;
156 case Macro::ALUOperation::Nand:
157 if (optimizer.zero_reg_skip) {
158 if (!is_a_zero) {
159 and_(src_a, src_b);
160 not_(src_a);
161 }
162 } else {
163 and_(src_a, src_b);
164 not_(src_a);
165 }
166 break;
167 default:
168 UNIMPLEMENTED_MSG("Unimplemented ALU operation {}",
169 static_cast<std::size_t>(opcode.alu_operation.Value()));
170 break;
171 }
172 Compile_ProcessResult(opcode.result_operation, opcode.dst);
173}
174
175void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
176 if (optimizer.skip_dummy_addimmediate) {
177 // Games tend to use this as an exit instruction placeholder. It's to encode an instruction
178 // without doing anything. In our case we can just not emit anything.
179 if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
180 return;
181 }
182 }
183 // Check for redundant moves
184 if (optimizer.optimize_for_method_move &&
185 opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
186 if (next_opcode.has_value()) {
187 const auto next = *next_opcode;
188 if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
189 opcode.dst == next.dst) {
190 return;
191 }
192 }
193 }
194 if (optimizer.zero_reg_skip && opcode.src_a == 0) {
195 if (opcode.immediate == 0) {
196 xor_(RESULT, RESULT);
197 } else {
198 mov(RESULT, opcode.immediate);
199 }
200 } else {
201 auto result = Compile_GetRegister(opcode.src_a, RESULT);
202 if (opcode.immediate > 2) {
203 add(result, opcode.immediate);
204 } else if (opcode.immediate == 1) {
205 inc(result);
206 } else if (opcode.immediate < 0) {
207 sub(result, opcode.immediate * -1);
208 }
209 }
210 Compile_ProcessResult(opcode.result_operation, opcode.dst);
211}
212
213void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
214 auto dst = Compile_GetRegister(opcode.src_a, RESULT);
215 auto src = Compile_GetRegister(opcode.src_b, eax);
216
217 if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) {
218 shr(src, opcode.bf_src_bit);
219 } else if (opcode.bf_src_bit == 31) {
220 xor_(src, src);
221 }
222 // Don't bother masking the whole register since we're using a 32 bit register
223 if (opcode.bf_size != 31 && opcode.bf_size != 0) {
224 and_(src, opcode.GetBitfieldMask());
225 } else if (opcode.bf_size == 0) {
226 xor_(src, src);
227 }
228 if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) {
229 shl(src, opcode.bf_dst_bit);
230 } else if (opcode.bf_dst_bit == 31) {
231 xor_(src, src);
232 }
233
234 const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
235 if (mask != 0xffffffff) {
236 and_(dst, mask);
237 }
238 or_(dst, src);
239 Compile_ProcessResult(opcode.result_operation, opcode.dst);
240}
241
242void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
243 const auto dst = Compile_GetRegister(opcode.src_a, ecx);
244 const auto src = Compile_GetRegister(opcode.src_b, RESULT);
245
246 shr(src, dst.cvt8());
247 if (opcode.bf_size != 0 && opcode.bf_size != 31) {
248 and_(src, opcode.GetBitfieldMask());
249 } else if (opcode.bf_size == 0) {
250 xor_(src, src);
251 }
252
253 if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) {
254 shl(src, opcode.bf_dst_bit);
255 } else if (opcode.bf_dst_bit == 31) {
256 xor_(src, src);
257 }
258 Compile_ProcessResult(opcode.result_operation, opcode.dst);
259}
260
261void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
262 const auto dst = Compile_GetRegister(opcode.src_a, ecx);
263 const auto src = Compile_GetRegister(opcode.src_b, RESULT);
264
265 if (opcode.bf_src_bit != 0) {
266 shr(src, opcode.bf_src_bit);
267 }
268
269 if (opcode.bf_size != 31) {
270 and_(src, opcode.GetBitfieldMask());
271 }
272 shl(src, dst.cvt8());
273
274 Compile_ProcessResult(opcode.result_operation, opcode.dst);
275}
276
277void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
278 if (optimizer.zero_reg_skip && opcode.src_a == 0) {
279 if (opcode.immediate == 0) {
280 xor_(RESULT, RESULT);
281 } else {
282 mov(RESULT, opcode.immediate);
283 }
284 } else {
285 auto result = Compile_GetRegister(opcode.src_a, RESULT);
286 if (opcode.immediate > 2) {
287 add(result, opcode.immediate);
288 } else if (opcode.immediate == 1) {
289 inc(result);
290 } else if (opcode.immediate < 0) {
291 sub(result, opcode.immediate * -1);
292 }
293 }
294
295 // Equivalent to Engines::Maxwell3D::GetRegisterValue:
296 if (optimizer.enable_asserts) {
297 Xbyak::Label pass_range_check;
298 cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
299 jb(pass_range_check);
300 int3();
301 L(pass_range_check);
302 }
303 mov(rax, qword[STATE]);
304 mov(RESULT,
305 dword[rax + offsetof(Engines::Maxwell3D, regs) +
306 offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
307
308 Compile_ProcessResult(opcode.result_operation, opcode.dst);
309}
310
311static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
312 maxwell3d->CallMethodFromMME(method_address.address, value);
313}
314
315void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
316 Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
317 mov(Common::X64::ABI_PARAM1, qword[STATE]);
318 mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS);
319 mov(Common::X64::ABI_PARAM3, value);
320 Common::X64::CallFarFunction(*this, &Send);
321 Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
322
323 Xbyak::Label dont_process{};
324 // Get increment
325 test(METHOD_ADDRESS, 0x3f000);
326 // If zero, method address doesn't update
327 je(dont_process);
328
329 mov(ecx, METHOD_ADDRESS);
330 and_(METHOD_ADDRESS, 0xfff);
331 shr(ecx, 12);
332 and_(ecx, 0x3f);
333 lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
334 sal(ecx, 12);
335 or_(eax, ecx);
336
337 mov(METHOD_ADDRESS, eax);
338
339 L(dont_process);
340}
341
342void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
343 ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
344 const s32 jump_address =
345 static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
346
347 Xbyak::Label end;
348 auto value = Compile_GetRegister(opcode.src_a, eax);
349 test(value, value);
350 if (optimizer.has_delayed_pc) {
351 switch (opcode.branch_condition) {
352 case Macro::BranchCondition::Zero:
353 jne(end, T_NEAR);
354 break;
355 case Macro::BranchCondition::NotZero:
356 je(end, T_NEAR);
357 break;
358 }
359
360 if (opcode.branch_annul) {
361 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
362 jmp(labels[jump_address], T_NEAR);
363 } else {
364 Xbyak::Label handle_post_exit{};
365 Xbyak::Label skip{};
366 jmp(skip, T_NEAR);
367 if (opcode.is_exit) {
368 L(handle_post_exit);
369 // Execute 1 instruction
370 mov(BRANCH_HOLDER, end_of_code);
371 // Jump to next instruction to skip delay slot check
372 jmp(labels[jump_address], T_NEAR);
373 } else {
374 L(handle_post_exit);
375 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
376 jmp(labels[jump_address], T_NEAR);
377 }
378 L(skip);
379 mov(BRANCH_HOLDER, handle_post_exit);
380 jmp(delay_skip[pc], T_NEAR);
381 }
382 } else {
383 switch (opcode.branch_condition) {
384 case Macro::BranchCondition::Zero:
385 je(labels[jump_address], T_NEAR);
386 break;
387 case Macro::BranchCondition::NotZero:
388 jne(labels[jump_address], T_NEAR);
389 break;
390 }
391 }
392
393 L(end);
394}
395
396void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() {
397 optimizer.can_skip_carry = true;
398 optimizer.has_delayed_pc = false;
399 for (auto raw_op : code) {
400 Macro::Opcode op{};
401 op.raw = raw_op;
402
403 if (op.operation == Macro::Operation::ALU) {
404 // Scan for any ALU operations which actually use the carry flag, if they don't exist in
405 // our current code we can skip emitting the carry flag handling operations
406 if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
407 op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
408 optimizer.can_skip_carry = false;
409 }
410 }
411
412 if (op.operation == Macro::Operation::Branch) {
413 if (!op.branch_annul) {
414 optimizer.has_delayed_pc = true;
415 }
416 }
417 }
418}
419
420void MacroJITx64Impl::Compile() {
421 MICROPROFILE_SCOPE(MacroJitCompile);
422 bool keep_executing = true;
423 labels.fill(Xbyak::Label());
424
425 Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
426 // JIT state
427 mov(STATE, Common::X64::ABI_PARAM1);
428 mov(PARAMETERS, Common::X64::ABI_PARAM2);
429 xor_(RESULT, RESULT);
430 xor_(METHOD_ADDRESS, METHOD_ADDRESS);
431 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
432
433 mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
434
435 // Track get register for zero registers and mark it as no-op
436 optimizer.zero_reg_skip = true;
437
438 // AddImmediate tends to be used as a NOP instruction, if we detect this we can
439 // completely skip the entire code path and no emit anything
440 optimizer.skip_dummy_addimmediate = true;
441
442 // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
443 // one if our register isn't "dirty"
444 optimizer.optimize_for_method_move = true;
445
446 // Enable run-time assertions in JITted code
447 optimizer.enable_asserts = false;
448
449 // Check to see if we can skip emitting certain instructions
450 Optimizer_ScanFlags();
451
452 const u32 op_count = static_cast<u32>(code.size());
453 for (u32 i = 0; i < op_count; i++) {
454 if (i < op_count - 1) {
455 pc = i + 1;
456 next_opcode = GetOpCode();
457 } else {
458 next_opcode = {};
459 }
460 pc = i;
461 Compile_NextInstruction();
462 }
463
464 L(end_of_code);
465
466 Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
467 ret();
468 ready();
469 program = getCode<ProgramType>();
470}
471
472bool MacroJITx64Impl::Compile_NextInstruction() {
473 const auto opcode = GetOpCode();
474 if (labels[pc].getAddress()) {
475 return false;
476 }
477
478 L(labels[pc]);
479
480 switch (opcode.operation) {
481 case Macro::Operation::ALU:
482 Compile_ALU(opcode);
483 break;
484 case Macro::Operation::AddImmediate:
485 Compile_AddImmediate(opcode);
486 break;
487 case Macro::Operation::ExtractInsert:
488 Compile_ExtractInsert(opcode);
489 break;
490 case Macro::Operation::ExtractShiftLeftImmediate:
491 Compile_ExtractShiftLeftImmediate(opcode);
492 break;
493 case Macro::Operation::ExtractShiftLeftRegister:
494 Compile_ExtractShiftLeftRegister(opcode);
495 break;
496 case Macro::Operation::Read:
497 Compile_Read(opcode);
498 break;
499 case Macro::Operation::Branch:
500 Compile_Branch(opcode);
501 break;
502 default:
503 UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
504 break;
505 }
506
507 if (optimizer.has_delayed_pc) {
508 if (opcode.is_exit) {
509 mov(rax, end_of_code);
510 test(BRANCH_HOLDER, BRANCH_HOLDER);
511 cmove(BRANCH_HOLDER, rax);
512 // Jump to next instruction to skip delay slot check
513 je(labels[pc + 1], T_NEAR);
514 } else {
515 // TODO(ogniK): Optimize delay slot branching
516 Xbyak::Label no_delay_slot{};
517 test(BRANCH_HOLDER, BRANCH_HOLDER);
518 je(no_delay_slot, T_NEAR);
519 mov(rax, BRANCH_HOLDER);
520 xor_(BRANCH_HOLDER, BRANCH_HOLDER);
521 jmp(rax);
522 L(no_delay_slot);
523 }
524 L(delay_skip[pc]);
525 if (opcode.is_exit) {
526 return false;
527 }
528 } else {
529 test(BRANCH_HOLDER, BRANCH_HOLDER);
530 jne(end_of_code, T_NEAR);
531 if (opcode.is_exit) {
532 inc(BRANCH_HOLDER);
533 return false;
534 }
535 }
536 return true;
537}
538
539Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() {
540 mov(eax, dword[PARAMETERS]);
541 add(PARAMETERS, sizeof(u32));
542 return eax;
543}
544
545Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
546 if (index == 0) {
547 // Register 0 is always zero
548 xor_(dst, dst);
549 } else {
550 mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
551 }
552
553 return dst;
554}
555
556void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
557 const auto SetRegister = [this](u32 reg, const Xbyak::Reg32& result) {
558 // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
559 // register.
560 if (reg == 0) {
561 return;
562 }
563 mov(dword[STATE + offsetof(JITState, registers) + reg * sizeof(u32)], result);
564 };
565 const auto SetMethodAddress = [this](const Xbyak::Reg32& reg) { mov(METHOD_ADDRESS, reg); };
566
567 switch (operation) {
568 case Macro::ResultOperation::IgnoreAndFetch:
569 SetRegister(reg, Compile_FetchParameter());
570 break;
571 case Macro::ResultOperation::Move:
572 SetRegister(reg, RESULT);
573 break;
574 case Macro::ResultOperation::MoveAndSetMethod:
575 SetRegister(reg, RESULT);
576 SetMethodAddress(RESULT);
577 break;
578 case Macro::ResultOperation::FetchAndSend:
579 // Fetch parameter and send result.
580 SetRegister(reg, Compile_FetchParameter());
581 Compile_Send(RESULT);
582 break;
583 case Macro::ResultOperation::MoveAndSend:
584 // Move and send result.
585 SetRegister(reg, RESULT);
586 Compile_Send(RESULT);
587 break;
588 case Macro::ResultOperation::FetchAndSetMethod:
589 // Fetch parameter and use result as Method Address.
590 SetRegister(reg, Compile_FetchParameter());
591 SetMethodAddress(RESULT);
592 break;
593 case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
594 // Move result and use as Method Address, then fetch and send parameter.
595 SetRegister(reg, RESULT);
596 SetMethodAddress(RESULT);
597 Compile_Send(Compile_FetchParameter());
598 break;
599 case Macro::ResultOperation::MoveAndSetMethodSend:
600 // Move result and use as Method Address, then send bits 12:17 of result.
601 SetRegister(reg, RESULT);
602 SetMethodAddress(RESULT);
603 shr(RESULT, 12);
604 and_(RESULT, 0b111111);
605 Compile_Send(RESULT);
606 break;
607 default:
608 UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation));
609 }
610}
611
612Macro::Opcode MacroJITx64Impl::GetOpCode() const {
613 ASSERT(pc < code.size());
614 return {code[pc]};
615}
616
617std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const {
618 return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
619}
620
621} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
new file mode 100644
index 000000000..a180e7428
--- /dev/null
+++ b/src/video_core/macro/macro_jit_x64.h
@@ -0,0 +1,98 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <bitset>
9#include <xbyak.h>
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/x64/xbyak_abi.h"
13#include "video_core/macro/macro.h"
14
15namespace Tegra {
16
17namespace Engines {
18class Maxwell3D;
19}
20
21/// MAX_CODE_SIZE is arbitrarily chosen based on current booting games
22constexpr size_t MAX_CODE_SIZE = 0x10000;
23
24class MacroJITx64 final : public MacroEngine {
25public:
26 explicit MacroJITx64(Engines::Maxwell3D& maxwell3d);
27
28protected:
29 std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
30
31private:
32 Engines::Maxwell3D& maxwell3d;
33};
34
35class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro {
36public:
37 MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code);
38 ~MacroJITx64Impl();
39
40 void Execute(const std::vector<u32>& parameters, u32 method) override;
41
42 void Compile_ALU(Macro::Opcode opcode);
43 void Compile_AddImmediate(Macro::Opcode opcode);
44 void Compile_ExtractInsert(Macro::Opcode opcode);
45 void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
46 void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
47 void Compile_Read(Macro::Opcode opcode);
48 void Compile_Branch(Macro::Opcode opcode);
49
50private:
51 void Optimizer_ScanFlags();
52
53 void Compile();
54 bool Compile_NextInstruction();
55
56 Xbyak::Reg32 Compile_FetchParameter();
57 Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
58
59 void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
60 void Compile_Send(Xbyak::Reg32 value);
61
62 Macro::Opcode GetOpCode() const;
63 std::bitset<32> PersistentCallerSavedRegs() const;
64
65 struct JITState {
66 Engines::Maxwell3D* maxwell3d{};
67 std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
68 u32 carry_flag{};
69 };
70 static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
71 using ProgramType = void (*)(JITState*, const u32*);
72
73 struct OptimizerState {
74 bool can_skip_carry{};
75 bool has_delayed_pc{};
76 bool zero_reg_skip{};
77 bool skip_dummy_addimmediate{};
78 bool optimize_for_method_move{};
79 bool enable_asserts{};
80 };
81 OptimizerState optimizer{};
82
83 std::optional<Macro::Opcode> next_opcode{};
84 ProgramType program{nullptr};
85
86 std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
87 std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
88 Xbyak::Label end_of_code{};
89
90 bool is_delay_slot{};
91 u32 pc{};
92 std::optional<u32> delayed_pc;
93
94 const std::vector<u32>& code;
95 Engines::Maxwell3D& maxwell3d;
96};
97
98} // namespace Tegra
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index dbee9f634..ff5505d12 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -210,10 +210,11 @@ bool MemoryManager::IsBlockContinuous(const GPUVAddr start, const std::size_t si
210 return range == inner_size; 210 return range == inner_size;
211} 211}
212 212
213void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::size_t size) const { 213void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer,
214 const std::size_t size) const {
214 std::size_t remaining_size{size}; 215 std::size_t remaining_size{size};
215 std::size_t page_index{src_addr >> page_bits}; 216 std::size_t page_index{gpu_src_addr >> page_bits};
216 std::size_t page_offset{src_addr & page_mask}; 217 std::size_t page_offset{gpu_src_addr & page_mask};
217 218
218 auto& memory = system.Memory(); 219 auto& memory = system.Memory();
219 220
@@ -234,11 +235,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
234 } 235 }
235} 236}
236 237
237void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, 238void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
238 const std::size_t size) const { 239 const std::size_t size) const {
239 std::size_t remaining_size{size}; 240 std::size_t remaining_size{size};
240 std::size_t page_index{src_addr >> page_bits}; 241 std::size_t page_index{gpu_src_addr >> page_bits};
241 std::size_t page_offset{src_addr & page_mask}; 242 std::size_t page_offset{gpu_src_addr & page_mask};
242 243
243 auto& memory = system.Memory(); 244 auto& memory = system.Memory();
244 245
@@ -259,10 +260,11 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
259 } 260 }
260} 261}
261 262
262void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t size) { 263void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer,
264 const std::size_t size) {
263 std::size_t remaining_size{size}; 265 std::size_t remaining_size{size};
264 std::size_t page_index{dest_addr >> page_bits}; 266 std::size_t page_index{gpu_dest_addr >> page_bits};
265 std::size_t page_offset{dest_addr & page_mask}; 267 std::size_t page_offset{gpu_dest_addr & page_mask};
266 268
267 auto& memory = system.Memory(); 269 auto& memory = system.Memory();
268 270
@@ -283,11 +285,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
283 } 285 }
284} 286}
285 287
286void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, 288void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer,
287 const std::size_t size) { 289 const std::size_t size) {
288 std::size_t remaining_size{size}; 290 std::size_t remaining_size{size};
289 std::size_t page_index{dest_addr >> page_bits}; 291 std::size_t page_index{gpu_dest_addr >> page_bits};
290 std::size_t page_offset{dest_addr & page_mask}; 292 std::size_t page_offset{gpu_dest_addr & page_mask};
291 293
292 auto& memory = system.Memory(); 294 auto& memory = system.Memory();
293 295
@@ -306,16 +308,18 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
306 } 308 }
307} 309}
308 310
309void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 311void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
312 const std::size_t size) {
310 std::vector<u8> tmp_buffer(size); 313 std::vector<u8> tmp_buffer(size);
311 ReadBlock(src_addr, tmp_buffer.data(), size); 314 ReadBlock(gpu_src_addr, tmp_buffer.data(), size);
312 WriteBlock(dest_addr, tmp_buffer.data(), size); 315 WriteBlock(gpu_dest_addr, tmp_buffer.data(), size);
313} 316}
314 317
315void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 318void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr,
319 const std::size_t size) {
316 std::vector<u8> tmp_buffer(size); 320 std::vector<u8> tmp_buffer(size);
317 ReadBlockUnsafe(src_addr, tmp_buffer.data(), size); 321 ReadBlockUnsafe(gpu_src_addr, tmp_buffer.data(), size);
318 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); 322 WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
319} 323}
320 324
321bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { 325bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 0ddd52d5a..87658e87a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -79,9 +79,9 @@ public:
79 * in the Host Memory counterpart. Note: This functions cause Host GPU Memory 79 * in the Host Memory counterpart. Note: This functions cause Host GPU Memory
80 * Flushes and Invalidations, respectively to each operation. 80 * Flushes and Invalidations, respectively to each operation.
81 */ 81 */
82 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; 82 void ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
83 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 83 void WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
84 void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 84 void CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
85 85
86 /** 86 /**
87 * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and 87 * ReadBlockUnsafe and WriteBlockUnsafe are special versions of ReadBlock and
@@ -93,9 +93,9 @@ public:
93 * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture 93 * WriteBlockUnsafe instead of WriteBlock since it shouldn't invalidate the texture
94 * being flushed. 94 * being flushed.
95 */ 95 */
96 void ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; 96 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 97 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 98 void CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size);
99 99
100 /** 100 /**
101 * IsGranularRange checks if a gpu region can be simply read with a pointer 101 * IsGranularRange checks if a gpu region can be simply read with a pointer
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 2f75f8801..e12dab899 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -220,8 +220,8 @@ private:
220 return cache_begin < addr_end && addr_begin < cache_end; 220 return cache_begin < addr_end && addr_begin < cache_end;
221 }; 221 };
222 222
223 const u64 page_end = addr_end >> PAGE_SHIFT; 223 const u64 page_end = addr_end >> PAGE_BITS;
224 for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) { 224 for (u64 page = addr_begin >> PAGE_BITS; page <= page_end; ++page) {
225 const auto& it = cached_queries.find(page); 225 const auto& it = cached_queries.find(page);
226 if (it == std::end(cached_queries)) { 226 if (it == std::end(cached_queries)) {
227 continue; 227 continue;
@@ -242,14 +242,14 @@ private:
242 /// Registers the passed parameters as cached and returns a pointer to the stored cached query. 242 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
243 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { 243 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
244 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); 244 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
245 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; 245 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_BITS;
246 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, 246 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
247 host_ptr); 247 host_ptr);
248 } 248 }
249 249
250 /// Tries to a get a cached query. Returns nullptr on failure. 250 /// Tries to a get a cached query. Returns nullptr on failure.
251 CachedQuery* TryGet(VAddr addr) { 251 CachedQuery* TryGet(VAddr addr) {
252 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; 252 const u64 page = static_cast<u64>(addr) >> PAGE_BITS;
253 const auto it = cached_queries.find(page); 253 const auto it = cached_queries.find(page);
254 if (it == std::end(cached_queries)) { 254 if (it == std::end(cached_queries)) {
255 return nullptr; 255 return nullptr;
@@ -268,7 +268,7 @@ private:
268 } 268 }
269 269
270 static constexpr std::uintptr_t PAGE_SIZE = 4096; 270 static constexpr std::uintptr_t PAGE_SIZE = 4096;
271 static constexpr unsigned PAGE_SHIFT = 12; 271 static constexpr unsigned PAGE_BITS = 12;
272 272
273 Core::System& system; 273 Core::System& system;
274 VideoCore::RasterizerInterface& rasterizer; 274 VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/rasterizer_cache.h"
6
7RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337c..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <set>
9#include <unordered_map>
10
11#include <boost/icl/interval_map.hpp>
12#include <boost/range/iterator_range_core.hpp>
13
14#include "common/common_types.h"
15#include "core/settings.h"
16#include "video_core/gpu.h"
17#include "video_core/rasterizer_interface.h"
18
19class RasterizerCacheObject {
20public:
21 explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
22
23 virtual ~RasterizerCacheObject();
24
25 VAddr GetCpuAddr() const {
26 return cpu_addr;
27 }
28
29 /// Gets the size of the shader in guest memory, required for cache management
30 virtual std::size_t GetSizeInBytes() const = 0;
31
32 /// Sets whether the cached object should be considered registered
33 void SetIsRegistered(bool registered) {
34 is_registered = registered;
35 }
36
37 /// Returns true if the cached object is registered
38 bool IsRegistered() const {
39 return is_registered;
40 }
41
42 /// Returns true if the cached object is dirty
43 bool IsDirty() const {
44 return is_dirty;
45 }
46
47 /// Returns ticks from when this cached object was last modified
48 u64 GetLastModifiedTicks() const {
49 return last_modified_ticks;
50 }
51
52 /// Marks an object as recently modified, used to specify whether it is clean or dirty
53 template <class T>
54 void MarkAsModified(bool dirty, T& cache) {
55 is_dirty = dirty;
56 last_modified_ticks = cache.GetModifiedTicks();
57 }
58
59 void SetMemoryMarked(bool is_memory_marked_) {
60 is_memory_marked = is_memory_marked_;
61 }
62
63 bool IsMemoryMarked() const {
64 return is_memory_marked;
65 }
66
67 void SetSyncPending(bool is_sync_pending_) {
68 is_sync_pending = is_sync_pending_;
69 }
70
71 bool IsSyncPending() const {
72 return is_sync_pending;
73 }
74
75private:
76 bool is_registered{}; ///< Whether the object is currently registered with the cache
77 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
78 bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
79 bool is_sync_pending{}; ///< Whether the object is pending deletion.
80 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
81 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
82};
83
84template <class T>
85class RasterizerCache : NonCopyable {
86 friend class RasterizerCacheObject;
87
88public:
89 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
90
91 /// Write any cached resources overlapping the specified region back to memory
92 void FlushRegion(VAddr addr, std::size_t size) {
93 std::lock_guard lock{mutex};
94
95 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
96 for (auto& object : objects) {
97 FlushObject(object);
98 }
99 }
100
101 /// Mark the specified region as being invalidated
102 void InvalidateRegion(VAddr addr, u64 size) {
103 std::lock_guard lock{mutex};
104
105 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
106 for (auto& object : objects) {
107 if (!object->IsRegistered()) {
108 // Skip duplicates
109 continue;
110 }
111 Unregister(object);
112 }
113 }
114
115 void OnCPUWrite(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex};
117
118 for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
119 if (object->IsRegistered()) {
120 UnmarkMemory(object);
121 object->SetSyncPending(true);
122 marked_for_unregister.emplace_back(object);
123 }
124 }
125 }
126
127 void SyncGuestHost() {
128 std::lock_guard lock{mutex};
129
130 for (const auto& object : marked_for_unregister) {
131 if (object->IsRegistered()) {
132 object->SetSyncPending(false);
133 Unregister(object);
134 }
135 }
136 marked_for_unregister.clear();
137 }
138
139 /// Invalidates everything in the cache
140 void InvalidateAll() {
141 std::lock_guard lock{mutex};
142
143 while (interval_cache.begin() != interval_cache.end()) {
144 Unregister(*interval_cache.begin()->second.begin());
145 }
146 }
147
148protected:
149 /// Tries to get an object from the cache with the specified cache address
150 T TryGet(VAddr addr) const {
151 const auto iter = map_cache.find(addr);
152 if (iter != map_cache.end())
153 return iter->second;
154 return nullptr;
155 }
156
157 /// Register an object into the cache
158 virtual void Register(const T& object) {
159 std::lock_guard lock{mutex};
160
161 object->SetIsRegistered(true);
162 interval_cache.add({GetInterval(object), ObjectSet{object}});
163 map_cache.insert({object->GetCpuAddr(), object});
164 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
165 object->SetMemoryMarked(true);
166 }
167
168 /// Unregisters an object from the cache
169 virtual void Unregister(const T& object) {
170 std::lock_guard lock{mutex};
171
172 UnmarkMemory(object);
173 object->SetIsRegistered(false);
174 if (object->IsSyncPending()) {
175 marked_for_unregister.remove(object);
176 object->SetSyncPending(false);
177 }
178 const VAddr addr = object->GetCpuAddr();
179 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
180 map_cache.erase(addr);
181 }
182
183 void UnmarkMemory(const T& object) {
184 if (!object->IsMemoryMarked()) {
185 return;
186 }
187 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
188 object->SetMemoryMarked(false);
189 }
190
191 /// Returns a ticks counter used for tracking when cached objects were last modified
192 u64 GetModifiedTicks() {
193 std::lock_guard lock{mutex};
194
195 return ++modified_ticks;
196 }
197
198 virtual void FlushObjectInner(const T& object) = 0;
199
200 /// Flushes the specified object, updating appropriate cache state as needed
201 void FlushObject(const T& object) {
202 std::lock_guard lock{mutex};
203
204 if (!object->IsDirty()) {
205 return;
206 }
207 FlushObjectInner(object);
208 object->MarkAsModified(false, *this);
209 }
210
211 std::recursive_mutex mutex;
212
213private:
214 /// Returns a list of cached objects from the specified memory region, ordered by access time
215 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
216 if (size == 0) {
217 return {};
218 }
219
220 std::vector<T> objects;
221 const ObjectInterval interval{addr, addr + size};
222 for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
223 for (auto& cached_object : pair.second) {
224 if (!cached_object) {
225 continue;
226 }
227 objects.push_back(cached_object);
228 }
229 }
230
231 std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
232 return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
233 });
234
235 return objects;
236 }
237
238 using ObjectSet = std::set<T>;
239 using ObjectCache = std::unordered_map<VAddr, T>;
240 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
241 using ObjectInterval = typename IntervalCache::interval_type;
242
243 static auto GetInterval(const T& object) {
244 return ObjectInterval::right_open(object->GetCpuAddr(),
245 object->GetCpuAddr() + object->GetSizeInBytes());
246 }
247
248 ObjectCache map_cache;
249 IntervalCache interval_cache; ///< Cache of objects
250 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
251 VideoCore::RasterizerInterface& rasterizer;
252 std::list<T> marked_for_unregister;
253};
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
new file mode 100644
index 000000000..eb5158407
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
@@ -0,0 +1,2073 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <string>
9#include <string_view>
10#include <utility>
11#include <variant>
12
13#include <fmt/format.h>
14
15#include "common/alignment.h"
16#include "common/assert.h"
17#include "common/common_types.h"
18#include "video_core/renderer_opengl/gl_arb_decompiler.h"
19#include "video_core/renderer_opengl/gl_device.h"
20#include "video_core/shader/registry.h"
21#include "video_core/shader/shader_ir.h"
22
23// Predicates in the decompiled code follow the convention that -1 means true and 0 means false.
24// GLASM lacks booleans, so they have to be implemented as integers.
25// Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to
26// select between two values, because -1 will be evaluated as true and 0 as false.
27
28namespace OpenGL {
29
30namespace {
31
32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute;
34using Tegra::Shader::PixelImap;
35using Tegra::Shader::Register;
36using namespace VideoCommon::Shader;
37using Operation = const OperationNode&;
38
39constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"};
40
41char Swizzle(std::size_t component) {
42 ASSERT(component < 4);
43 return component["xyzw"];
44}
45
46constexpr bool IsGenericAttribute(Attribute::Index index) {
47 return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
48}
49
50u32 GetGenericAttributeIndex(Attribute::Index index) {
51 ASSERT(IsGenericAttribute(index));
52 return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
53}
54
55std::string_view Modifiers(Operation operation) {
56 const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta());
57 if (meta && meta->precise) {
58 return ".PREC";
59 }
60 return "";
61}
62
63std::string_view GetInputFlags(PixelImap attribute) {
64 switch (attribute) {
65 case PixelImap::Perspective:
66 return "";
67 case PixelImap::Constant:
68 return "FLAT ";
69 case PixelImap::ScreenLinear:
70 return "NOPERSPECTIVE ";
71 case PixelImap::Unused:
72 break;
73 }
74 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
75 return {};
76}
77
78std::string_view ImageType(Tegra::Shader::ImageType image_type) {
79 switch (image_type) {
80 case Tegra::Shader::ImageType::Texture1D:
81 return "1D";
82 case Tegra::Shader::ImageType::TextureBuffer:
83 return "BUFFER";
84 case Tegra::Shader::ImageType::Texture1DArray:
85 return "ARRAY1D";
86 case Tegra::Shader::ImageType::Texture2D:
87 return "2D";
88 case Tegra::Shader::ImageType::Texture2DArray:
89 return "ARRAY2D";
90 case Tegra::Shader::ImageType::Texture3D:
91 return "3D";
92 }
93 UNREACHABLE();
94 return {};
95}
96
97std::string_view StackName(MetaStackClass stack) {
98 switch (stack) {
99 case MetaStackClass::Ssy:
100 return "SSY";
101 case MetaStackClass::Pbk:
102 return "PBK";
103 }
104 UNREACHABLE();
105 return "";
106};
107
108std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
109 switch (topology) {
110 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
111 return "POINTS";
112 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
113 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
114 return "LINES";
115 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
116 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
117 return "LINES_ADJACENCY";
118 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
119 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
120 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
121 return "TRIANGLES";
122 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
123 case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
124 return "TRIANGLES_ADJACENCY";
125 default:
126 UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
127 return "POINTS";
128 }
129}
130
131std::string_view TopologyName(Tegra::Shader::OutputTopology topology) {
132 switch (topology) {
133 case Tegra::Shader::OutputTopology::PointList:
134 return "POINTS";
135 case Tegra::Shader::OutputTopology::LineStrip:
136 return "LINE_STRIP";
137 case Tegra::Shader::OutputTopology::TriangleStrip:
138 return "TRIANGLE_STRIP";
139 default:
140 UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology));
141 return "points";
142 }
143}
144
145std::string_view StageInputName(ShaderType stage) {
146 switch (stage) {
147 case ShaderType::Vertex:
148 case ShaderType::Geometry:
149 return "vertex";
150 case ShaderType::Fragment:
151 return "fragment";
152 case ShaderType::Compute:
153 return "invocation";
154 default:
155 UNREACHABLE();
156 return "";
157 }
158}
159
160std::string TextureType(const MetaTexture& meta) {
161 if (meta.sampler.is_buffer) {
162 return "BUFFER";
163 }
164 std::string type;
165 if (meta.sampler.is_shadow) {
166 type += "SHADOW";
167 }
168 if (meta.sampler.is_array) {
169 type += "ARRAY";
170 }
171 type += [&meta] {
172 switch (meta.sampler.type) {
173 case Tegra::Shader::TextureType::Texture1D:
174 return "1D";
175 case Tegra::Shader::TextureType::Texture2D:
176 return "2D";
177 case Tegra::Shader::TextureType::Texture3D:
178 return "3D";
179 case Tegra::Shader::TextureType::TextureCube:
180 return "CUBE";
181 }
182 UNREACHABLE();
183 return "2D";
184 }();
185 return type;
186}
187
188std::string GlobalMemoryName(const GlobalMemoryBase& base) {
189 return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset);
190}
191
192class ARBDecompiler final {
193public:
194 explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
195 ShaderType stage, std::string_view identifier);
196
197 std::string Code() const {
198 return shader_source;
199 }
200
201private:
202 void DeclareHeader();
203 void DeclareVertex();
204 void DeclareGeometry();
205 void DeclareFragment();
206 void DeclareCompute();
207 void DeclareInputAttributes();
208 void DeclareOutputAttributes();
209 void DeclareLocalMemory();
210 void DeclareGlobalMemory();
211 void DeclareConstantBuffers();
212 void DeclareRegisters();
213 void DeclareTemporaries();
214 void DeclarePredicates();
215 void DeclareInternalFlags();
216
217 void InitializeVariables();
218
219 void DecompileAST();
220 void DecompileBranchMode();
221
222 void VisitAST(const ASTNode& node);
223 std::string VisitExpression(const Expr& node);
224
225 void VisitBlock(const NodeBlock& bb);
226
227 std::string Visit(const Node& node);
228
229 std::pair<std::string, std::size_t> BuildCoords(Operation);
230 std::string BuildAoffi(Operation);
231 void Exit();
232
233 std::string Assign(Operation);
234 std::string Select(Operation);
235 std::string FClamp(Operation);
236 std::string FCastHalf0(Operation);
237 std::string FCastHalf1(Operation);
238 std::string FSqrt(Operation);
239 std::string FSwizzleAdd(Operation);
240 std::string HAdd2(Operation);
241 std::string HMul2(Operation);
242 std::string HFma2(Operation);
243 std::string HAbsolute(Operation);
244 std::string HNegate(Operation);
245 std::string HClamp(Operation);
246 std::string HCastFloat(Operation);
247 std::string HUnpack(Operation);
248 std::string HMergeF32(Operation);
249 std::string HMergeH0(Operation);
250 std::string HMergeH1(Operation);
251 std::string HPack2(Operation);
252 std::string LogicalAssign(Operation);
253 std::string LogicalPick2(Operation);
254 std::string LogicalAnd2(Operation);
255 std::string FloatOrdered(Operation);
256 std::string FloatUnordered(Operation);
257 std::string LogicalAddCarry(Operation);
258 std::string Texture(Operation);
259 std::string TextureGather(Operation);
260 std::string TextureQueryDimensions(Operation);
261 std::string TextureQueryLod(Operation);
262 std::string TexelFetch(Operation);
263 std::string TextureGradient(Operation);
264 std::string ImageLoad(Operation);
265 std::string ImageStore(Operation);
266 std::string Branch(Operation);
267 std::string BranchIndirect(Operation);
268 std::string PushFlowStack(Operation);
269 std::string PopFlowStack(Operation);
270 std::string Exit(Operation);
271 std::string Discard(Operation);
272 std::string EmitVertex(Operation);
273 std::string EndPrimitive(Operation);
274 std::string InvocationId(Operation);
275 std::string YNegate(Operation);
276 std::string ThreadId(Operation);
277 std::string ShuffleIndexed(Operation);
278 std::string Barrier(Operation);
279 std::string MemoryBarrierGroup(Operation);
280 std::string MemoryBarrierGlobal(Operation);
281
282 template <const std::string_view& op>
283 std::string Unary(Operation operation) {
284 std::string temporary = AllocTemporary();
285 AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]));
286 return temporary;
287 }
288
289 template <const std::string_view& op>
290 std::string Binary(Operation operation) {
291 std::string temporary = AllocTemporary();
292 AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
293 Visit(operation[1]));
294 return temporary;
295 }
296
297 template <const std::string_view& op>
298 std::string Trinary(Operation operation) {
299 std::string temporary = AllocTemporary();
300 AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]),
301 Visit(operation[1]), Visit(operation[2]));
302 return temporary;
303 }
304
305 template <const std::string_view& op, bool unordered>
306 std::string FloatComparison(Operation operation) {
307 std::string temporary = AllocTemporary();
308 AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation));
309 AddLine("MOV.S {}, 0;", temporary);
310 AddLine("MOV.S {} (NE.x), -1;", temporary);
311
312 const std::string op_a = Visit(operation[0]);
313 const std::string op_b = Visit(operation[1]);
314 if constexpr (unordered) {
315 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
316 AddLine("TRUNC.U.CC RC.x, RC.x;");
317 AddLine("MOV.S {} (NE.x), -1;", temporary);
318 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
319 AddLine("TRUNC.U.CC RC.x, RC.x;");
320 AddLine("MOV.S {} (NE.x), -1;", temporary);
321 } else if (op == SNE_F) {
322 AddLine("SNE.F RC.x, {}, {};", op_a, op_a);
323 AddLine("TRUNC.U.CC RC.x, RC.x;");
324 AddLine("MOV.S {} (NE.x), 0;", temporary);
325 AddLine("SNE.F RC.x, {}, {};", op_b, op_b);
326 AddLine("TRUNC.U.CC RC.x, RC.x;");
327 AddLine("MOV.S {} (NE.x), 0;", temporary);
328 }
329 return temporary;
330 }
331
332 template <const std::string_view& op, bool is_nan>
333 std::string HalfComparison(Operation operation) {
334 std::string tmp1 = AllocVectorTemporary();
335 const std::string tmp2 = AllocVectorTemporary();
336 const std::string op_a = Visit(operation[0]);
337 const std::string op_b = Visit(operation[1]);
338 AddLine("UP2H.F {}, {};", tmp1, op_a);
339 AddLine("UP2H.F {}, {};", tmp2, op_b);
340 AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2);
341 AddLine("TRUNC.U.CC RC.xy, {};", tmp1);
342 AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1);
343 AddLine("MOV.S {}.x (NE.x), -1;", tmp1);
344 AddLine("MOV.S {}.y (NE.y), -1;", tmp1);
345 if constexpr (is_nan) {
346 AddLine("MOVC.F RC.x, {};", op_a);
347 AddLine("MOV.S {}.x (NAN.x), -1;", tmp1);
348 AddLine("MOVC.F RC.x, {};", op_b);
349 AddLine("MOV.S {}.y (NAN.x), -1;", tmp1);
350 }
351 return tmp1;
352 }
353
354 template <const std::string_view& op, const std::string_view& type>
355 std::string AtomicImage(Operation operation) {
356 const auto& meta = std::get<MetaImage>(operation.GetMeta());
357 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
358 const std::size_t num_coords = operation.GetOperandsCount();
359 const std::size_t num_values = meta.values.size();
360
361 const std::string coord = AllocVectorTemporary();
362 const std::string value = AllocVectorTemporary();
363 for (std::size_t i = 0; i < num_coords; ++i) {
364 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
365 }
366 for (std::size_t i = 0; i < num_values; ++i) {
367 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
368 }
369
370 AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, coord, value, coord,
371 image_id, ImageType(meta.image.type));
372 return fmt::format("{}.x", coord);
373 }
374
375 template <const std::string_view& op, const std::string_view& type>
376 std::string Atomic(Operation operation) {
377 std::string temporary = AllocTemporary();
378 std::string address;
379 std::string_view opname;
380 if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
381 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
382 Visit(gmem->GetBaseAddress()));
383 address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary);
384 opname = "ATOMB";
385 } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
386 address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
387 opname = "ATOMS";
388 } else {
389 UNREACHABLE();
390 return "{0, 0, 0, 0}";
391 }
392 AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
393 return temporary;
394 }
395
396 template <char type>
397 std::string Negate(Operation operation) {
398 std::string temporary = AllocTemporary();
399 if constexpr (type == 'F') {
400 AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0]));
401 } else {
402 AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0]));
403 }
404 return temporary;
405 }
406
407 template <char type>
408 std::string Absolute(Operation operation) {
409 std::string temporary = AllocTemporary();
410 AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0]));
411 return temporary;
412 }
413
414 template <char type>
415 std::string BitfieldInsert(Operation operation) {
416 const std::string temporary = AllocVectorTemporary();
417 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3]));
418 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2]));
419 AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]),
420 Visit(operation[0]));
421 return fmt::format("{}.x", temporary);
422 }
423
424 template <char type>
425 std::string BitfieldExtract(Operation operation) {
426 const std::string temporary = AllocVectorTemporary();
427 AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2]));
428 AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1]));
429 AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0]));
430 return fmt::format("{}.x", temporary);
431 }
432
433 template <char swizzle>
434 std::string LocalInvocationId(Operation) {
435 return fmt::format("invocation.localid.{}", swizzle);
436 }
437
438 template <char swizzle>
439 std::string WorkGroupId(Operation) {
440 return fmt::format("invocation.groupid.{}", swizzle);
441 }
442
443 template <char c1, char c2>
444 std::string ThreadMask(Operation) {
445 return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2);
446 }
447
448 template <typename... Args>
449 void AddExpression(std::string_view text, Args&&... args) {
450 shader_source += fmt::format(text, std::forward<Args>(args)...);
451 }
452
453 template <typename... Args>
454 void AddLine(std::string_view text, Args&&... args) {
455 AddExpression(text, std::forward<Args>(args)...);
456 shader_source += '\n';
457 }
458
459 std::string AllocTemporary() {
460 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
461 return fmt::format("T{}.x", num_temporaries++);
462 }
463
464 std::string AllocVectorTemporary() {
465 max_temporaries = std::max(max_temporaries, num_temporaries + 1);
466 return fmt::format("T{}", num_temporaries++);
467 }
468
469 void ResetTemporaries() noexcept {
470 num_temporaries = 0;
471 }
472
473 const Device& device;
474 const ShaderIR& ir;
475 const Registry& registry;
476 const ShaderType stage;
477
478 std::size_t num_temporaries = 0;
479 std::size_t max_temporaries = 0;
480
481 std::string shader_source;
482
483 static constexpr std::string_view ADD_F32 = "ADD.F32";
484 static constexpr std::string_view ADD_S = "ADD.S";
485 static constexpr std::string_view ADD_U = "ADD.U";
486 static constexpr std::string_view MUL_F32 = "MUL.F32";
487 static constexpr std::string_view MUL_S = "MUL.S";
488 static constexpr std::string_view MUL_U = "MUL.U";
489 static constexpr std::string_view DIV_F32 = "DIV.F32";
490 static constexpr std::string_view DIV_S = "DIV.S";
491 static constexpr std::string_view DIV_U = "DIV.U";
492 static constexpr std::string_view MAD_F32 = "MAD.F32";
493 static constexpr std::string_view RSQ_F32 = "RSQ.F32";
494 static constexpr std::string_view COS_F32 = "COS.F32";
495 static constexpr std::string_view SIN_F32 = "SIN.F32";
496 static constexpr std::string_view EX2_F32 = "EX2.F32";
497 static constexpr std::string_view LG2_F32 = "LG2.F32";
498 static constexpr std::string_view SLT_F = "SLT.F32";
499 static constexpr std::string_view SLT_S = "SLT.S";
500 static constexpr std::string_view SLT_U = "SLT.U";
501 static constexpr std::string_view SEQ_F = "SEQ.F32";
502 static constexpr std::string_view SEQ_S = "SEQ.S";
503 static constexpr std::string_view SEQ_U = "SEQ.U";
504 static constexpr std::string_view SLE_F = "SLE.F32";
505 static constexpr std::string_view SLE_S = "SLE.S";
506 static constexpr std::string_view SLE_U = "SLE.U";
507 static constexpr std::string_view SGT_F = "SGT.F32";
508 static constexpr std::string_view SGT_S = "SGT.S";
509 static constexpr std::string_view SGT_U = "SGT.U";
510 static constexpr std::string_view SNE_F = "SNE.F32";
511 static constexpr std::string_view SNE_S = "SNE.S";
512 static constexpr std::string_view SNE_U = "SNE.U";
513 static constexpr std::string_view SGE_F = "SGE.F32";
514 static constexpr std::string_view SGE_S = "SGE.S";
515 static constexpr std::string_view SGE_U = "SGE.U";
516 static constexpr std::string_view AND_S = "AND.S";
517 static constexpr std::string_view AND_U = "AND.U";
518 static constexpr std::string_view TRUNC_F = "TRUNC.F";
519 static constexpr std::string_view TRUNC_S = "TRUNC.S";
520 static constexpr std::string_view TRUNC_U = "TRUNC.U";
521 static constexpr std::string_view SHL_S = "SHL.S";
522 static constexpr std::string_view SHL_U = "SHL.U";
523 static constexpr std::string_view SHR_S = "SHR.S";
524 static constexpr std::string_view SHR_U = "SHR.U";
525 static constexpr std::string_view OR_S = "OR.S";
526 static constexpr std::string_view OR_U = "OR.U";
527 static constexpr std::string_view XOR_S = "XOR.S";
528 static constexpr std::string_view XOR_U = "XOR.U";
529 static constexpr std::string_view NOT_S = "NOT.S";
530 static constexpr std::string_view NOT_U = "NOT.U";
531 static constexpr std::string_view BTC_S = "BTC.S";
532 static constexpr std::string_view BTC_U = "BTC.U";
533 static constexpr std::string_view BTFM_S = "BTFM.S";
534 static constexpr std::string_view BTFM_U = "BTFM.U";
535 static constexpr std::string_view ROUND_F = "ROUND.F";
536 static constexpr std::string_view CEIL_F = "CEIL.F";
537 static constexpr std::string_view FLR_F = "FLR.F";
538 static constexpr std::string_view I2F_S = "I2F.S";
539 static constexpr std::string_view I2F_U = "I2F.U";
540 static constexpr std::string_view MIN_F = "MIN.F";
541 static constexpr std::string_view MIN_S = "MIN.S";
542 static constexpr std::string_view MIN_U = "MIN.U";
543 static constexpr std::string_view MAX_F = "MAX.F";
544 static constexpr std::string_view MAX_S = "MAX.S";
545 static constexpr std::string_view MAX_U = "MAX.U";
546 static constexpr std::string_view MOV_U = "MOV.U";
547 static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U";
548 static constexpr std::string_view TGALL_U = "TGALL.U";
549 static constexpr std::string_view TGANY_U = "TGANY.U";
550 static constexpr std::string_view TGEQ_U = "TGEQ.U";
551 static constexpr std::string_view EXCH = "EXCH";
552 static constexpr std::string_view ADD = "ADD";
553 static constexpr std::string_view MIN = "MIN";
554 static constexpr std::string_view MAX = "MAX";
555 static constexpr std::string_view AND = "AND";
556 static constexpr std::string_view OR = "OR";
557 static constexpr std::string_view XOR = "XOR";
558 static constexpr std::string_view U32 = "U32";
559 static constexpr std::string_view S32 = "S32";
560
561 static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount);
562 using DecompilerType = std::string (ARBDecompiler::*)(Operation);
563 static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = {
564 &ARBDecompiler::Assign,
565
566 &ARBDecompiler::Select,
567
568 &ARBDecompiler::Binary<ADD_F32>,
569 &ARBDecompiler::Binary<MUL_F32>,
570 &ARBDecompiler::Binary<DIV_F32>,
571 &ARBDecompiler::Trinary<MAD_F32>,
572 &ARBDecompiler::Negate<'F'>,
573 &ARBDecompiler::Absolute<'F'>,
574 &ARBDecompiler::FClamp,
575 &ARBDecompiler::FCastHalf0,
576 &ARBDecompiler::FCastHalf1,
577 &ARBDecompiler::Binary<MIN_F>,
578 &ARBDecompiler::Binary<MAX_F>,
579 &ARBDecompiler::Unary<COS_F32>,
580 &ARBDecompiler::Unary<SIN_F32>,
581 &ARBDecompiler::Unary<EX2_F32>,
582 &ARBDecompiler::Unary<LG2_F32>,
583 &ARBDecompiler::Unary<RSQ_F32>,
584 &ARBDecompiler::FSqrt,
585 &ARBDecompiler::Unary<ROUND_F>,
586 &ARBDecompiler::Unary<FLR_F>,
587 &ARBDecompiler::Unary<CEIL_F>,
588 &ARBDecompiler::Unary<TRUNC_F>,
589 &ARBDecompiler::Unary<I2F_S>,
590 &ARBDecompiler::Unary<I2F_U>,
591 &ARBDecompiler::FSwizzleAdd,
592
593 &ARBDecompiler::Binary<ADD_S>,
594 &ARBDecompiler::Binary<MUL_S>,
595 &ARBDecompiler::Binary<DIV_S>,
596 &ARBDecompiler::Negate<'S'>,
597 &ARBDecompiler::Absolute<'S'>,
598 &ARBDecompiler::Binary<MIN_S>,
599 &ARBDecompiler::Binary<MAX_S>,
600
601 &ARBDecompiler::Unary<TRUNC_S>,
602 &ARBDecompiler::Unary<MOV_U>,
603 &ARBDecompiler::Binary<SHL_S>,
604 &ARBDecompiler::Binary<SHR_U>,
605 &ARBDecompiler::Binary<SHR_S>,
606 &ARBDecompiler::Binary<AND_S>,
607 &ARBDecompiler::Binary<OR_S>,
608 &ARBDecompiler::Binary<XOR_S>,
609 &ARBDecompiler::Unary<NOT_S>,
610 &ARBDecompiler::BitfieldInsert<'S'>,
611 &ARBDecompiler::BitfieldExtract<'S'>,
612 &ARBDecompiler::Unary<BTC_S>,
613 &ARBDecompiler::Unary<BTFM_S>,
614
615 &ARBDecompiler::Binary<ADD_U>,
616 &ARBDecompiler::Binary<MUL_U>,
617 &ARBDecompiler::Binary<DIV_U>,
618 &ARBDecompiler::Binary<MIN_U>,
619 &ARBDecompiler::Binary<MAX_U>,
620 &ARBDecompiler::Unary<TRUNC_U>,
621 &ARBDecompiler::Unary<MOV_U>,
622 &ARBDecompiler::Binary<SHL_U>,
623 &ARBDecompiler::Binary<SHR_U>,
624 &ARBDecompiler::Binary<SHR_U>,
625 &ARBDecompiler::Binary<AND_U>,
626 &ARBDecompiler::Binary<OR_U>,
627 &ARBDecompiler::Binary<XOR_U>,
628 &ARBDecompiler::Unary<NOT_U>,
629 &ARBDecompiler::BitfieldInsert<'U'>,
630 &ARBDecompiler::BitfieldExtract<'U'>,
631 &ARBDecompiler::Unary<BTC_U>,
632 &ARBDecompiler::Unary<BTFM_U>,
633
634 &ARBDecompiler::HAdd2,
635 &ARBDecompiler::HMul2,
636 &ARBDecompiler::HFma2,
637 &ARBDecompiler::HAbsolute,
638 &ARBDecompiler::HNegate,
639 &ARBDecompiler::HClamp,
640 &ARBDecompiler::HCastFloat,
641 &ARBDecompiler::HUnpack,
642 &ARBDecompiler::HMergeF32,
643 &ARBDecompiler::HMergeH0,
644 &ARBDecompiler::HMergeH1,
645 &ARBDecompiler::HPack2,
646
647 &ARBDecompiler::LogicalAssign,
648 &ARBDecompiler::Binary<AND_U>,
649 &ARBDecompiler::Binary<OR_U>,
650 &ARBDecompiler::Binary<XOR_U>,
651 &ARBDecompiler::Unary<NOT_U>,
652 &ARBDecompiler::LogicalPick2,
653 &ARBDecompiler::LogicalAnd2,
654
655 &ARBDecompiler::FloatComparison<SLT_F, false>,
656 &ARBDecompiler::FloatComparison<SEQ_F, false>,
657 &ARBDecompiler::FloatComparison<SLE_F, false>,
658 &ARBDecompiler::FloatComparison<SGT_F, false>,
659 &ARBDecompiler::FloatComparison<SNE_F, false>,
660 &ARBDecompiler::FloatComparison<SGE_F, false>,
661 &ARBDecompiler::FloatOrdered,
662 &ARBDecompiler::FloatUnordered,
663 &ARBDecompiler::FloatComparison<SLT_F, true>,
664 &ARBDecompiler::FloatComparison<SEQ_F, true>,
665 &ARBDecompiler::FloatComparison<SLE_F, true>,
666 &ARBDecompiler::FloatComparison<SGT_F, true>,
667 &ARBDecompiler::FloatComparison<SNE_F, true>,
668 &ARBDecompiler::FloatComparison<SGE_F, true>,
669
670 &ARBDecompiler::Binary<SLT_S>,
671 &ARBDecompiler::Binary<SEQ_S>,
672 &ARBDecompiler::Binary<SLE_S>,
673 &ARBDecompiler::Binary<SGT_S>,
674 &ARBDecompiler::Binary<SNE_S>,
675 &ARBDecompiler::Binary<SGE_S>,
676
677 &ARBDecompiler::Binary<SLT_U>,
678 &ARBDecompiler::Binary<SEQ_U>,
679 &ARBDecompiler::Binary<SLE_U>,
680 &ARBDecompiler::Binary<SGT_U>,
681 &ARBDecompiler::Binary<SNE_U>,
682 &ARBDecompiler::Binary<SGE_U>,
683
684 &ARBDecompiler::LogicalAddCarry,
685
686 &ARBDecompiler::HalfComparison<SLT_F, false>,
687 &ARBDecompiler::HalfComparison<SEQ_F, false>,
688 &ARBDecompiler::HalfComparison<SLE_F, false>,
689 &ARBDecompiler::HalfComparison<SGT_F, false>,
690 &ARBDecompiler::HalfComparison<SNE_F, false>,
691 &ARBDecompiler::HalfComparison<SGE_F, false>,
692 &ARBDecompiler::HalfComparison<SLT_F, true>,
693 &ARBDecompiler::HalfComparison<SEQ_F, true>,
694 &ARBDecompiler::HalfComparison<SLE_F, true>,
695 &ARBDecompiler::HalfComparison<SGT_F, true>,
696 &ARBDecompiler::HalfComparison<SNE_F, true>,
697 &ARBDecompiler::HalfComparison<SGE_F, true>,
698
699 &ARBDecompiler::Texture,
700 &ARBDecompiler::Texture,
701 &ARBDecompiler::TextureGather,
702 &ARBDecompiler::TextureQueryDimensions,
703 &ARBDecompiler::TextureQueryLod,
704 &ARBDecompiler::TexelFetch,
705 &ARBDecompiler::TextureGradient,
706
707 &ARBDecompiler::ImageLoad,
708 &ARBDecompiler::ImageStore,
709
710 &ARBDecompiler::AtomicImage<ADD, U32>,
711 &ARBDecompiler::AtomicImage<AND, U32>,
712 &ARBDecompiler::AtomicImage<OR, U32>,
713 &ARBDecompiler::AtomicImage<XOR, U32>,
714 &ARBDecompiler::AtomicImage<EXCH, U32>,
715
716 &ARBDecompiler::Atomic<EXCH, U32>,
717 &ARBDecompiler::Atomic<ADD, U32>,
718 &ARBDecompiler::Atomic<MIN, U32>,
719 &ARBDecompiler::Atomic<MAX, U32>,
720 &ARBDecompiler::Atomic<AND, U32>,
721 &ARBDecompiler::Atomic<OR, U32>,
722 &ARBDecompiler::Atomic<XOR, U32>,
723
724 &ARBDecompiler::Atomic<EXCH, S32>,
725 &ARBDecompiler::Atomic<ADD, S32>,
726 &ARBDecompiler::Atomic<MIN, S32>,
727 &ARBDecompiler::Atomic<MAX, S32>,
728 &ARBDecompiler::Atomic<AND, S32>,
729 &ARBDecompiler::Atomic<OR, S32>,
730 &ARBDecompiler::Atomic<XOR, S32>,
731
732 &ARBDecompiler::Atomic<ADD, U32>,
733 &ARBDecompiler::Atomic<MIN, U32>,
734 &ARBDecompiler::Atomic<MAX, U32>,
735 &ARBDecompiler::Atomic<AND, U32>,
736 &ARBDecompiler::Atomic<OR, U32>,
737 &ARBDecompiler::Atomic<XOR, U32>,
738
739 &ARBDecompiler::Atomic<ADD, S32>,
740 &ARBDecompiler::Atomic<MIN, S32>,
741 &ARBDecompiler::Atomic<MAX, S32>,
742 &ARBDecompiler::Atomic<AND, S32>,
743 &ARBDecompiler::Atomic<OR, S32>,
744 &ARBDecompiler::Atomic<XOR, S32>,
745
746 &ARBDecompiler::Branch,
747 &ARBDecompiler::BranchIndirect,
748 &ARBDecompiler::PushFlowStack,
749 &ARBDecompiler::PopFlowStack,
750 &ARBDecompiler::Exit,
751 &ARBDecompiler::Discard,
752
753 &ARBDecompiler::EmitVertex,
754 &ARBDecompiler::EndPrimitive,
755
756 &ARBDecompiler::InvocationId,
757 &ARBDecompiler::YNegate,
758 &ARBDecompiler::LocalInvocationId<'x'>,
759 &ARBDecompiler::LocalInvocationId<'y'>,
760 &ARBDecompiler::LocalInvocationId<'z'>,
761 &ARBDecompiler::WorkGroupId<'x'>,
762 &ARBDecompiler::WorkGroupId<'y'>,
763 &ARBDecompiler::WorkGroupId<'z'>,
764
765 &ARBDecompiler::Unary<TGBALLOT_U>,
766 &ARBDecompiler::Unary<TGALL_U>,
767 &ARBDecompiler::Unary<TGANY_U>,
768 &ARBDecompiler::Unary<TGEQ_U>,
769
770 &ARBDecompiler::ThreadId,
771 &ARBDecompiler::ThreadMask<'e', 'q'>,
772 &ARBDecompiler::ThreadMask<'g', 'e'>,
773 &ARBDecompiler::ThreadMask<'g', 't'>,
774 &ARBDecompiler::ThreadMask<'l', 'e'>,
775 &ARBDecompiler::ThreadMask<'l', 't'>,
776 &ARBDecompiler::ShuffleIndexed,
777
778 &ARBDecompiler::Barrier,
779 &ARBDecompiler::MemoryBarrierGroup,
780 &ARBDecompiler::MemoryBarrierGlobal,
781 };
782};
783
784ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
785 ShaderType stage, std::string_view identifier)
786 : device{device}, ir{ir}, registry{registry}, stage{stage} {
787 AddLine("TEMP RC;");
788 AddLine("TEMP FSWZA[4];");
789 AddLine("TEMP FSWZB[4];");
790 if (ir.IsDecompiled()) {
791 DecompileAST();
792 } else {
793 DecompileBranchMode();
794 }
795 AddLine("END");
796
797 const std::string code = std::move(shader_source);
798 DeclareHeader();
799 DeclareVertex();
800 DeclareGeometry();
801 DeclareFragment();
802 DeclareCompute();
803 DeclareInputAttributes();
804 DeclareOutputAttributes();
805 DeclareLocalMemory();
806 DeclareGlobalMemory();
807 DeclareConstantBuffers();
808 DeclareRegisters();
809 DeclareTemporaries();
810 DeclarePredicates();
811 DeclareInternalFlags();
812
813 shader_source += code;
814}
815
816std::string_view HeaderStageName(ShaderType stage) {
817 switch (stage) {
818 case ShaderType::Vertex:
819 return "vp";
820 case ShaderType::Geometry:
821 return "gp";
822 case ShaderType::Fragment:
823 return "fp";
824 case ShaderType::Compute:
825 return "cp";
826 default:
827 UNREACHABLE();
828 return "";
829 }
830}
831
832void ARBDecompiler::DeclareHeader() {
833 AddLine("!!NV{}5.0", HeaderStageName(stage));
834 // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D
835 AddLine("OPTION NV_internal;");
836 AddLine("OPTION NV_gpu_program_fp64;");
837 AddLine("OPTION NV_shader_storage_buffer;");
838 AddLine("OPTION NV_shader_thread_group;");
839 if (ir.UsesWarps() && device.HasWarpIntrinsics()) {
840 AddLine("OPTION NV_shader_thread_shuffle;");
841 }
842 if (stage == ShaderType::Vertex) {
843 if (device.HasNvViewportArray2()) {
844 AddLine("OPTION NV_viewport_array2;");
845 }
846 }
847 if (stage == ShaderType::Fragment) {
848 AddLine("OPTION ARB_draw_buffers;");
849 }
850 if (device.HasImageLoadFormatted()) {
851 AddLine("OPTION EXT_shader_image_load_formatted;");
852 }
853}
854
855void ARBDecompiler::DeclareVertex() {
856 if (stage != ShaderType::Vertex) {
857 return;
858 }
859 AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};");
860}
861
862void ARBDecompiler::DeclareGeometry() {
863 if (stage != ShaderType::Geometry) {
864 return;
865 }
866 const auto& info = registry.GetGraphicsInfo();
867 const auto& header = ir.GetHeader();
868 AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology));
869 AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology));
870 AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value());
871 AddLine("ATTRIB vertex_position = vertex.position;");
872}
873
874void ARBDecompiler::DeclareFragment() {
875 if (stage != ShaderType::Fragment) {
876 return;
877 }
878 AddLine("OUTPUT result_color7 = result.color[7];");
879 AddLine("OUTPUT result_color6 = result.color[6];");
880 AddLine("OUTPUT result_color5 = result.color[5];");
881 AddLine("OUTPUT result_color4 = result.color[4];");
882 AddLine("OUTPUT result_color3 = result.color[3];");
883 AddLine("OUTPUT result_color2 = result.color[2];");
884 AddLine("OUTPUT result_color1 = result.color[1];");
885 AddLine("OUTPUT result_color0 = result.color;");
886}
887
888void ARBDecompiler::DeclareCompute() {
889 if (stage != ShaderType::Compute) {
890 return;
891 }
892 const ComputeInfo& info = registry.GetComputeInfo();
893 AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1],
894 info.workgroup_size[2]);
895 if (info.shared_memory_size_in_words > 0) {
896 const u32 size_in_bytes = info.shared_memory_size_in_words * 4;
897 AddLine("SHARED_MEMORY {};", size_in_bytes);
898 AddLine("SHARED shared_mem[] = {{program.sharedmem}};");
899 }
900}
901
902void ARBDecompiler::DeclareInputAttributes() {
903 if (stage == ShaderType::Compute) {
904 return;
905 }
906 const std::string_view stage_name = StageInputName(stage);
907 for (const auto attribute : ir.GetInputAttributes()) {
908 if (!IsGenericAttribute(attribute)) {
909 continue;
910 }
911 const u32 index = GetGenericAttributeIndex(attribute);
912
913 std::string_view suffix;
914 if (stage == ShaderType::Fragment) {
915 const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)};
916 if (input_mode == PixelImap::Unused) {
917 return;
918 }
919 suffix = GetInputFlags(input_mode);
920 }
921 AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index,
922 index);
923 }
924}
925
926void ARBDecompiler::DeclareOutputAttributes() {
927 if (stage == ShaderType::Compute) {
928 return;
929 }
930 for (const auto attribute : ir.GetOutputAttributes()) {
931 if (!IsGenericAttribute(attribute)) {
932 continue;
933 }
934 const u32 index = GetGenericAttributeIndex(attribute);
935 AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index);
936 }
937}
938
939void ARBDecompiler::DeclareLocalMemory() {
940 u64 size = 0;
941 if (stage == ShaderType::Compute) {
942 size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
943 } else {
944 size = ir.GetHeader().GetLocalMemorySize();
945 }
946 if (size == 0) {
947 return;
948 }
949 const u64 element_count = Common::AlignUp(size, 4) / 4;
950 AddLine("TEMP lmem[{}];", element_count);
951}
952
953void ARBDecompiler::DeclareGlobalMemory() {
954 u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer;
955 for (const auto& pair : ir.GetGlobalMemory()) {
956 const auto& base = pair.first;
957 AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding);
958 ++binding;
959 }
960}
961
962void ARBDecompiler::DeclareConstantBuffers() {
963 u32 binding = 0;
964 for (const auto& cbuf : ir.GetConstantBuffers()) {
965 AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding);
966 ++binding;
967 }
968}
969
970void ARBDecompiler::DeclareRegisters() {
971 for (const u32 gpr : ir.GetRegisters()) {
972 AddLine("TEMP R{};", gpr);
973 }
974}
975
976void ARBDecompiler::DeclareTemporaries() {
977 for (std::size_t i = 0; i < max_temporaries; ++i) {
978 AddLine("TEMP T{};", i);
979 }
980}
981
982void ARBDecompiler::DeclarePredicates() {
983 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
984 AddLine("TEMP P{};", static_cast<u64>(pred));
985 }
986}
987
988void ARBDecompiler::DeclareInternalFlags() {
989 for (const char* name : INTERNAL_FLAG_NAMES) {
990 AddLine("TEMP {};", name);
991 }
992}
993
994void ARBDecompiler::InitializeVariables() {
995 AddLine("MOV.F32 FSWZA[0], -1;");
996 AddLine("MOV.F32 FSWZA[1], 1;");
997 AddLine("MOV.F32 FSWZA[2], -1;");
998 AddLine("MOV.F32 FSWZA[3], 0;");
999 AddLine("MOV.F32 FSWZB[0], -1;");
1000 AddLine("MOV.F32 FSWZB[1], -1;");
1001 AddLine("MOV.F32 FSWZB[2], 1;");
1002 AddLine("MOV.F32 FSWZB[3], -1;");
1003
1004 if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) {
1005 AddLine("MOV.F result.position, {{0, 0, 0, 1}};");
1006 }
1007 for (const auto attribute : ir.GetOutputAttributes()) {
1008 if (!IsGenericAttribute(attribute)) {
1009 continue;
1010 }
1011 const u32 index = GetGenericAttributeIndex(attribute);
1012 AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index);
1013 }
1014 for (const u32 gpr : ir.GetRegisters()) {
1015 AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr);
1016 }
1017 for (const Tegra::Shader::Pred pred : ir.GetPredicates()) {
1018 AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred));
1019 }
1020}
1021
1022void ARBDecompiler::DecompileAST() {
1023 const u32 num_flow_variables = ir.GetASTNumVariables();
1024 for (u32 i = 0; i < num_flow_variables; ++i) {
1025 AddLine("TEMP F{};", i);
1026 }
1027 for (u32 i = 0; i < num_flow_variables; ++i) {
1028 AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i);
1029 }
1030
1031 InitializeVariables();
1032
1033 VisitAST(ir.GetASTProgram());
1034}
1035
1036void ARBDecompiler::DecompileBranchMode() {
1037 static constexpr u32 FLOW_STACK_SIZE = 20;
1038 if (!ir.IsFlowStackDisabled()) {
1039 AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE);
1040 AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE);
1041 AddLine("TEMP SSY_TOP;");
1042 AddLine("TEMP PBK_TOP;");
1043 }
1044
1045 AddLine("TEMP PC;");
1046
1047 if (!ir.IsFlowStackDisabled()) {
1048 AddLine("MOV.U SSY_TOP.x, 0;");
1049 AddLine("MOV.U PBK_TOP.x, 0;");
1050 }
1051
1052 InitializeVariables();
1053
1054 const auto basic_block_end = ir.GetBasicBlocks().end();
1055 auto basic_block_it = ir.GetBasicBlocks().begin();
1056 const u32 first_address = basic_block_it->first;
1057 AddLine("MOV.U PC.x, {};", first_address);
1058
1059 AddLine("REP;");
1060
1061 std::size_t num_blocks = 0;
1062 while (basic_block_it != basic_block_end) {
1063 const auto& [address, bb] = *basic_block_it;
1064 ++num_blocks;
1065
1066 AddLine("SEQ.S.CC RC.x, PC.x, {};", address);
1067 AddLine("IF NE.x;");
1068
1069 VisitBlock(bb);
1070
1071 ++basic_block_it;
1072
1073 if (basic_block_it != basic_block_end) {
1074 const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]);
1075 if (!op || op->GetCode() != OperationCode::Branch) {
1076 const u32 next_address = basic_block_it->first;
1077 AddLine("MOV.U PC.x, {};", next_address);
1078 AddLine("CONT;");
1079 }
1080 }
1081
1082 AddLine("ELSE;");
1083 }
1084 AddLine("RET;");
1085 while (num_blocks--) {
1086 AddLine("ENDIF;");
1087 }
1088
1089 AddLine("ENDREP;");
1090}
1091
1092void ARBDecompiler::VisitAST(const ASTNode& node) {
1093 if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) {
1094 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1095 VisitAST(current);
1096 }
1097 } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) {
1098 const std::string condition = VisitExpression(ast->condition);
1099 ResetTemporaries();
1100
1101 AddLine("MOVC.U RC.x, {};", condition);
1102 AddLine("IF NE.x;");
1103 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1104 VisitAST(current);
1105 }
1106 AddLine("ENDIF;");
1107 } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) {
1108 AddLine("ELSE;");
1109 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1110 VisitAST(current);
1111 }
1112 } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) {
1113 VisitBlock(ast->nodes);
1114 } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) {
1115 AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition));
1116 ResetTemporaries();
1117 } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) {
1118 const std::string condition = VisitExpression(ast->condition);
1119 ResetTemporaries();
1120 AddLine("REP;");
1121 for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) {
1122 VisitAST(current);
1123 }
1124 AddLine("MOVC.U RC.x, {};", condition);
1125 AddLine("BRK (NE.x);");
1126 AddLine("ENDREP;");
1127 } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) {
1128 const bool is_true = ExprIsTrue(ast->condition);
1129 if (!is_true) {
1130 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1131 AddLine("IF NE.x;");
1132 ResetTemporaries();
1133 }
1134 if (ast->kills) {
1135 AddLine("KIL TR;");
1136 } else {
1137 Exit();
1138 }
1139 if (!is_true) {
1140 AddLine("ENDIF;");
1141 }
1142 } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) {
1143 if (ExprIsTrue(ast->condition)) {
1144 AddLine("BRK;");
1145 } else {
1146 AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition));
1147 AddLine("BRK (NE.x);");
1148 ResetTemporaries();
1149 }
1150 } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) {
1151 // Nothing to do
1152 } else {
1153 UNREACHABLE();
1154 }
1155}
1156
1157std::string ARBDecompiler::VisitExpression(const Expr& node) {
1158 if (const auto expr = std::get_if<ExprAnd>(&*node)) {
1159 std::string result = AllocTemporary();
1160 AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1),
1161 VisitExpression(expr->operand2));
1162 return result;
1163 }
1164 if (const auto expr = std::get_if<ExprOr>(&*node)) {
1165 std::string result = AllocTemporary();
1166 AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1),
1167 VisitExpression(expr->operand2));
1168 return result;
1169 }
1170 if (const auto expr = std::get_if<ExprNot>(&*node)) {
1171 std::string result = AllocTemporary();
1172 AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1));
1173 return result;
1174 }
1175 if (const auto expr = std::get_if<ExprPredicate>(&*node)) {
1176 return fmt::format("P{}.x", static_cast<u64>(expr->predicate));
1177 }
1178 if (const auto expr = std::get_if<ExprCondCode>(&*node)) {
1179 return Visit(ir.GetConditionCode(expr->cc));
1180 }
1181 if (const auto expr = std::get_if<ExprVar>(&*node)) {
1182 return fmt::format("F{}.x", expr->var_index);
1183 }
1184 if (const auto expr = std::get_if<ExprBoolean>(&*node)) {
1185 return expr->value ? "0xffffffff" : "0";
1186 }
1187 if (const auto expr = std::get_if<ExprGprEqual>(&*node)) {
1188 std::string result = AllocTemporary();
1189 AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value);
1190 return result;
1191 }
1192 UNREACHABLE();
1193 return "0";
1194}
1195
1196void ARBDecompiler::VisitBlock(const NodeBlock& bb) {
1197 for (const auto& node : bb) {
1198 Visit(node);
1199 }
1200}
1201
1202std::string ARBDecompiler::Visit(const Node& node) {
1203 if (const auto operation = std::get_if<OperationNode>(&*node)) {
1204 if (const auto amend_index = operation->GetAmendIndex()) {
1205 Visit(ir.GetAmendNode(*amend_index));
1206 }
1207 const std::size_t index = static_cast<std::size_t>(operation->GetCode());
1208 if (index >= OPERATION_DECOMPILERS.size()) {
1209 UNREACHABLE_MSG("Out of bounds operation: {}", index);
1210 return {};
1211 }
1212 const auto decompiler = OPERATION_DECOMPILERS[index];
1213 if (decompiler == nullptr) {
1214 UNREACHABLE_MSG("Undefined operation: {}", index);
1215 return {};
1216 }
1217 return (this->*decompiler)(*operation);
1218 }
1219
1220 if (const auto gpr = std::get_if<GprNode>(&*node)) {
1221 const u32 index = gpr->GetIndex();
1222 if (index == Register::ZeroIndex) {
1223 return "{0, 0, 0, 0}.x";
1224 }
1225 return fmt::format("R{}.x", index);
1226 }
1227
1228 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
1229 return fmt::format("CV{}.x", cv->GetIndex());
1230 }
1231
1232 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
1233 std::string temporary = AllocTemporary();
1234 AddLine("MOV.U {}, {};", temporary, immediate->GetValue());
1235 return temporary;
1236 }
1237
1238 if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
1239 std::string temporary = AllocTemporary();
1240 switch (const auto index = predicate->GetIndex(); index) {
1241 case Tegra::Shader::Pred::UnusedIndex:
1242 AddLine("MOV.S {}, -1;", temporary);
1243 break;
1244 case Tegra::Shader::Pred::NeverExecute:
1245 AddLine("MOV.S {}, 0;", temporary);
1246 break;
1247 default:
1248 AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index));
1249 break;
1250 }
1251 if (predicate->IsNegated()) {
1252 AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary);
1253 }
1254 return temporary;
1255 }
1256
1257 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
1258 if (abuf->IsPhysicalBuffer()) {
1259 UNIMPLEMENTED_MSG("Physical buffers are not implemented");
1260 return "{0, 0, 0, 0}.x";
1261 }
1262
1263 const auto buffer_index = [this, &abuf]() -> std::string {
1264 if (stage != ShaderType::Geometry) {
1265 return "";
1266 }
1267 return fmt::format("[{}]", Visit(abuf->GetBuffer()));
1268 };
1269
1270 const Attribute::Index index = abuf->GetIndex();
1271 const u32 element = abuf->GetElement();
1272 const char swizzle = Swizzle(element);
1273 switch (index) {
1274 case Attribute::Index::Position: {
1275 if (stage == ShaderType::Geometry) {
1276 return fmt::format("{}_position[{}].{}", StageInputName(stage),
1277 Visit(abuf->GetBuffer()), swizzle);
1278 } else {
1279 return fmt::format("{}.position.{}", StageInputName(stage), swizzle);
1280 }
1281 }
1282 case Attribute::Index::TessCoordInstanceIDVertexID:
1283 ASSERT(stage == ShaderType::Vertex);
1284 switch (element) {
1285 case 2:
1286 return "vertex.instance";
1287 case 3:
1288 return "vertex.id";
1289 }
1290 UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
1291 break;
1292 case Attribute::Index::PointCoord:
1293 switch (element) {
1294 case 0:
1295 return "fragment.pointcoord.x";
1296 case 1:
1297 return "fragment.pointcoord.y";
1298 }
1299 UNIMPLEMENTED();
1300 break;
1301 case Attribute::Index::FrontFacing: {
1302 ASSERT(stage == ShaderType::Fragment);
1303 ASSERT(element == 3);
1304 const std::string temporary = AllocVectorTemporary();
1305 AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};");
1306 AddLine("MOV.U.CC RC.x, -RC;");
1307 AddLine("MOV.S {}.x, 0;", temporary);
1308 AddLine("MOV.S {}.x (NE.x), -1;", temporary);
1309 return fmt::format("{}.x", temporary);
1310 }
1311 default:
1312 if (IsGenericAttribute(index)) {
1313 if (stage == ShaderType::Geometry) {
1314 return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index),
1315 Visit(abuf->GetBuffer()), swizzle);
1316 } else {
1317 return fmt::format("{}.attrib[{}].{}", StageInputName(stage),
1318 GetGenericAttributeIndex(index), swizzle);
1319 }
1320 }
1321 UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index));
1322 break;
1323 }
1324 return "{0, 0, 0, 0}.x";
1325 }
1326
1327 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1328 std::string offset_string;
1329 const auto& offset = cbuf->GetOffset();
1330 if (const auto imm = std::get_if<ImmediateNode>(&*offset)) {
1331 offset_string = std::to_string(imm->GetValue());
1332 } else {
1333 offset_string = Visit(offset);
1334 }
1335 std::string temporary = AllocTemporary();
1336 AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string);
1337 return temporary;
1338 }
1339
1340 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1341 std::string temporary = AllocTemporary();
1342 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1343 Visit(gmem->GetBaseAddress()));
1344 AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()),
1345 temporary);
1346 return temporary;
1347 }
1348
1349 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
1350 std::string temporary = Visit(lmem->GetAddress());
1351 AddLine("SHR.U {}, {}, 2;", temporary, temporary);
1352 AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary);
1353 return temporary;
1354 }
1355
1356 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1357 std::string temporary = Visit(smem->GetAddress());
1358 AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary);
1359 return temporary;
1360 }
1361
1362 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
1363 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1364 return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1365 }
1366
1367 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
1368 if (const auto amend_index = conditional->GetAmendIndex()) {
1369 Visit(ir.GetAmendNode(*amend_index));
1370 }
1371 AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition()));
1372 AddLine("IF NE.x;");
1373 VisitBlock(conditional->GetCode());
1374 AddLine("ENDIF;");
1375 return {};
1376 }
1377
1378 if (const auto cmt = std::get_if<CommentNode>(&*node)) {
1379 // Uncommenting this will generate invalid code. GLASM lacks comments.
1380 // AddLine("// {}", cmt->GetText());
1381 return {};
1382 }
1383
1384 UNIMPLEMENTED();
1385 return {};
1386}
1387
1388std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) {
1389 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1390 UNIMPLEMENTED_IF(meta.sampler.is_indexed);
1391 UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array &&
1392 meta.sampler.type == Tegra::Shader::TextureType::TextureCube);
1393
1394 const std::size_t count = operation.GetOperandsCount();
1395 std::string temporary = AllocVectorTemporary();
1396 std::size_t i = 0;
1397 for (; i < count; ++i) {
1398 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1399 }
1400 if (meta.sampler.is_array) {
1401 AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array));
1402 }
1403 if (meta.sampler.is_shadow) {
1404 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare));
1405 }
1406 return {std::move(temporary), i};
1407}
1408
1409std::string ARBDecompiler::BuildAoffi(Operation operation) {
1410 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1411 if (meta.aoffi.empty()) {
1412 return {};
1413 }
1414 const std::string temporary = AllocVectorTemporary();
1415 std::size_t i = 0;
1416 for (auto& node : meta.aoffi) {
1417 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node));
1418 }
1419 return fmt::format(", offset({})", temporary);
1420}
1421
1422void ARBDecompiler::Exit() {
1423 if (stage != ShaderType::Fragment) {
1424 AddLine("RET;");
1425 return;
1426 }
1427
1428 const auto safe_get_register = [this](u32 reg) -> std::string {
1429 // TODO(Rodrigo): Replace with contains once C++20 releases
1430 const auto& used_registers = ir.GetRegisters();
1431 if (used_registers.find(reg) != used_registers.end()) {
1432 return fmt::format("R{}.x", reg);
1433 }
1434 return "{0, 0, 0, 0}.x";
1435 };
1436
1437 const auto& header = ir.GetHeader();
1438 u32 current_reg = 0;
1439 for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) {
1440 for (u32 component = 0; component < 4; ++component) {
1441 if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
1442 continue;
1443 }
1444 AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component),
1445 safe_get_register(current_reg));
1446 ++current_reg;
1447 }
1448 }
1449 if (header.ps.omap.depth) {
1450 AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1));
1451 }
1452
1453 AddLine("RET;");
1454}
1455
1456std::string ARBDecompiler::Assign(Operation operation) {
1457 const Node& dest = operation[0];
1458 const Node& src = operation[1];
1459
1460 std::string dest_name;
1461 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1462 if (gpr->GetIndex() == Register::ZeroIndex) {
1463 // Writing to Register::ZeroIndex is a no op
1464 return {};
1465 }
1466 dest_name = fmt::format("R{}.x", gpr->GetIndex());
1467 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1468 const u32 element = abuf->GetElement();
1469 const char swizzle = Swizzle(element);
1470 switch (const Attribute::Index index = abuf->GetIndex()) {
1471 case Attribute::Index::Position:
1472 dest_name = fmt::format("result.position.{}", swizzle);
1473 break;
1474 case Attribute::Index::LayerViewportPointSize:
1475 switch (element) {
1476 case 0:
1477 UNIMPLEMENTED();
1478 return {};
1479 case 1:
1480 case 2:
1481 if (!device.HasNvViewportArray2()) {
1482 LOG_ERROR(
1483 Render_OpenGL,
1484 "NV_viewport_array2 is missing. Maxwell gen 2 or better is required.");
1485 return {};
1486 }
1487 dest_name = element == 1 ? "result.layer.x" : "result.viewport.x";
1488 break;
1489 case 3:
1490 dest_name = "result.pointsize.x";
1491 break;
1492 }
1493 break;
1494 case Attribute::Index::ClipDistances0123:
1495 dest_name = fmt::format("result.clip[{}].x", element);
1496 break;
1497 case Attribute::Index::ClipDistances4567:
1498 dest_name = fmt::format("result.clip[{}].x", element + 4);
1499 break;
1500 default:
1501 if (!IsGenericAttribute(index)) {
1502 UNREACHABLE();
1503 return {};
1504 }
1505 dest_name =
1506 fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle);
1507 break;
1508 }
1509 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1510 const std::string address = Visit(lmem->GetAddress());
1511 AddLine("SHR.U {}, {}, 2;", address, address);
1512 dest_name = fmt::format("lmem[{}].x", address);
1513 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1514 AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress()));
1515 ResetTemporaries();
1516 return {};
1517 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1518 const std::string temporary = AllocTemporary();
1519 AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()),
1520 Visit(gmem->GetBaseAddress()));
1521 AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()),
1522 temporary);
1523 ResetTemporaries();
1524 return {};
1525 } else {
1526 UNREACHABLE();
1527 ResetTemporaries();
1528 return {};
1529 }
1530
1531 AddLine("MOV.U {}, {};", dest_name, Visit(src));
1532 ResetTemporaries();
1533 return {};
1534}
1535
1536std::string ARBDecompiler::Select(Operation operation) {
1537 std::string temporary = AllocTemporary();
1538 AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]),
1539 Visit(operation[2]));
1540 return temporary;
1541}
1542
1543std::string ARBDecompiler::FClamp(Operation operation) {
1544 // 1.0f in hex, replace with std::bit_cast on C++20
1545 static constexpr u32 POSITIVE_ONE = 0x3f800000;
1546
1547 std::string temporary = AllocTemporary();
1548 const Node& value = operation[0];
1549 const Node& low = operation[1];
1550 const Node& high = operation[2];
1551 const auto* const imm_low = std::get_if<ImmediateNode>(&*low);
1552 const auto* const imm_high = std::get_if<ImmediateNode>(&*high);
1553 if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) {
1554 AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value));
1555 } else {
1556 AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high));
1557 AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low));
1558 }
1559 return temporary;
1560}
1561
1562std::string ARBDecompiler::FCastHalf0(Operation operation) {
1563 const std::string temporary = AllocVectorTemporary();
1564 AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0]));
1565 return fmt::format("{}.x", temporary);
1566}
1567
1568std::string ARBDecompiler::FCastHalf1(Operation operation) {
1569 const std::string temporary = AllocVectorTemporary();
1570 AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0]));
1571 AddLine("MOV {}.x, {}.y;", temporary, temporary);
1572 return fmt::format("{}.x", temporary);
1573}
1574
1575std::string ARBDecompiler::FSqrt(Operation operation) {
1576 std::string temporary = AllocTemporary();
1577 AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0]));
1578 AddLine("RCP.F32 {}, {};", temporary, temporary);
1579 return temporary;
1580}
1581
1582std::string ARBDecompiler::FSwizzleAdd(Operation operation) {
1583 const std::string temporary = AllocVectorTemporary();
1584 if (!device.HasWarpIntrinsics()) {
1585 LOG_ERROR(Render_OpenGL,
1586 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
1587 AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]));
1588 return fmt::format("{}.x", temporary);
1589 }
1590
1591 AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage));
1592 AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary);
1593 AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary);
1594 AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary);
1595 AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary);
1596 AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary);
1597 AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary);
1598 return fmt::format("{}.x", temporary);
1599}
1600
1601std::string ARBDecompiler::HAdd2(Operation operation) {
1602 const std::string tmp1 = AllocVectorTemporary();
1603 const std::string tmp2 = AllocVectorTemporary();
1604 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1605 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1606 AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1607 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1608 return fmt::format("{}.x", tmp1);
1609}
1610
1611std::string ARBDecompiler::HMul2(Operation operation) {
1612 const std::string tmp1 = AllocVectorTemporary();
1613 const std::string tmp2 = AllocVectorTemporary();
1614 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1615 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1616 AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2);
1617 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1618 return fmt::format("{}.x", tmp1);
1619}
1620
1621std::string ARBDecompiler::HFma2(Operation operation) {
1622 const std::string tmp1 = AllocVectorTemporary();
1623 const std::string tmp2 = AllocVectorTemporary();
1624 const std::string tmp3 = AllocVectorTemporary();
1625 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1626 AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1]));
1627 AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2]));
1628 AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3);
1629 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1630 return fmt::format("{}.x", tmp1);
1631}
1632
1633std::string ARBDecompiler::HAbsolute(Operation operation) {
1634 const std::string temporary = AllocVectorTemporary();
1635 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1636 AddLine("PK2H.F {}.x, |{}|;", temporary, temporary);
1637 return fmt::format("{}.x", temporary);
1638}
1639
1640std::string ARBDecompiler::HNegate(Operation operation) {
1641 const std::string temporary = AllocVectorTemporary();
1642 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1643 AddLine("MOVC.S RC.x, {};", Visit(operation[1]));
1644 AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary);
1645 AddLine("MOVC.S RC.x, {};", Visit(operation[2]));
1646 AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary);
1647 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1648 return fmt::format("{}.x", temporary);
1649}
1650
1651std::string ARBDecompiler::HClamp(Operation operation) {
1652 const std::string tmp1 = AllocVectorTemporary();
1653 const std::string tmp2 = AllocVectorTemporary();
1654 AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0]));
1655 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1]));
1656 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1657 AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2);
1658 AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2]));
1659 AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2);
1660 AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2);
1661 AddLine("PK2H.F {}.x, {};", tmp1, tmp1);
1662 return fmt::format("{}.x", tmp1);
1663}
1664
1665std::string ARBDecompiler::HCastFloat(Operation operation) {
1666 const std::string temporary = AllocVectorTemporary();
1667 AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary);
1668 AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0]));
1669 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1670 return fmt::format("{}.x", temporary);
1671}
1672
1673std::string ARBDecompiler::HUnpack(Operation operation) {
1674 const std::string operand = Visit(operation[0]);
1675 switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
1676 case Tegra::Shader::HalfType::H0_H1:
1677 return operand;
1678 case Tegra::Shader::HalfType::F32: {
1679 const std::string temporary = AllocVectorTemporary();
1680 AddLine("MOV.U {}.x, {};", temporary, operand);
1681 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1682 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1683 return fmt::format("{}.x", temporary);
1684 }
1685 case Tegra::Shader::HalfType::H0_H0: {
1686 const std::string temporary = AllocVectorTemporary();
1687 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1688 AddLine("MOV.U {}.y, {}.x;", temporary, temporary);
1689 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1690 return fmt::format("{}.x", temporary);
1691 }
1692 case Tegra::Shader::HalfType::H1_H1: {
1693 const std::string temporary = AllocVectorTemporary();
1694 AddLine("UP2H.F {}.xy, {};", temporary, operand);
1695 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
1696 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1697 return fmt::format("{}.x", temporary);
1698 }
1699 }
1700 UNREACHABLE();
1701 return "{0, 0, 0, 0}.x";
1702}
1703
1704std::string ARBDecompiler::HMergeF32(Operation operation) {
1705 const std::string temporary = AllocVectorTemporary();
1706 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1707 return fmt::format("{}.x", temporary);
1708}
1709
1710std::string ARBDecompiler::HMergeH0(Operation operation) {
1711 const std::string temporary = AllocVectorTemporary();
1712 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1713 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1714 AddLine("MOV.U {}.x, {}.z;", temporary, temporary);
1715 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1716 return fmt::format("{}.x", temporary);
1717}
1718
1719std::string ARBDecompiler::HMergeH1(Operation operation) {
1720 const std::string temporary = AllocVectorTemporary();
1721 AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0]));
1722 AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1]));
1723 AddLine("MOV.U {}.y, {}.w;", temporary, temporary);
1724 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1725 return fmt::format("{}.x", temporary);
1726}
1727
1728std::string ARBDecompiler::HPack2(Operation operation) {
1729 const std::string temporary = AllocVectorTemporary();
1730 AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0]));
1731 AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1]));
1732 AddLine("PK2H.F {}.x, {};", temporary, temporary);
1733 return fmt::format("{}.x", temporary);
1734}
1735
1736std::string ARBDecompiler::LogicalAssign(Operation operation) {
1737 const Node& dest = operation[0];
1738 const Node& src = operation[1];
1739
1740 std::string target;
1741
1742 if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
1743 ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
1744
1745 const Tegra::Shader::Pred index = pred->GetIndex();
1746 switch (index) {
1747 case Tegra::Shader::Pred::NeverExecute:
1748 case Tegra::Shader::Pred::UnusedIndex:
1749 // Writing to these predicates is a no-op
1750 return {};
1751 }
1752 target = fmt::format("P{}.x", static_cast<u64>(index));
1753 } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) {
1754 const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag());
1755 target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]);
1756 } else {
1757 UNREACHABLE();
1758 ResetTemporaries();
1759 return {};
1760 }
1761
1762 AddLine("MOV.U {}, {};", target, Visit(src));
1763 ResetTemporaries();
1764 return {};
1765}
1766
1767std::string ARBDecompiler::LogicalPick2(Operation operation) {
1768 std::string temporary = AllocTemporary();
1769 const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue();
1770 AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index));
1771 return temporary;
1772}
1773
1774std::string ARBDecompiler::LogicalAnd2(Operation operation) {
1775 std::string temporary = AllocTemporary();
1776 const std::string op = Visit(operation[0]);
1777 AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op);
1778 return temporary;
1779}
1780
1781std::string ARBDecompiler::FloatOrdered(Operation operation) {
1782 std::string temporary = AllocTemporary();
1783 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1784 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1785 AddLine("MOV.S {}, -1;", temporary);
1786 AddLine("MOV.S {} (NAN.x), 0;", temporary);
1787 AddLine("MOV.S {} (NAN.y), 0;", temporary);
1788 return temporary;
1789}
1790
1791std::string ARBDecompiler::FloatUnordered(Operation operation) {
1792 std::string temporary = AllocTemporary();
1793 AddLine("MOVC.F32 RC.x, {};", Visit(operation[0]));
1794 AddLine("MOVC.F32 RC.y, {};", Visit(operation[1]));
1795 AddLine("MOV.S {}, 0;", temporary);
1796 AddLine("MOV.S {} (NAN.x), -1;", temporary);
1797 AddLine("MOV.S {} (NAN.y), -1;", temporary);
1798 return temporary;
1799}
1800
1801std::string ARBDecompiler::LogicalAddCarry(Operation operation) {
1802 std::string temporary = AllocTemporary();
1803 AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1]));
1804 AddLine("MOV.S {}, 0;", temporary);
1805 AddLine("IF CF.x;");
1806 AddLine("MOV.S {}, -1;", temporary);
1807 AddLine("ENDIF;");
1808 return temporary;
1809}
1810
1811std::string ARBDecompiler::Texture(Operation operation) {
1812 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1813 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1814 const auto [temporary, swizzle] = BuildCoords(operation);
1815
1816 std::string_view opcode = "TEX";
1817 std::string extra;
1818 if (meta.bias) {
1819 ASSERT(!meta.lod);
1820 opcode = "TXB";
1821
1822 if (swizzle < 4) {
1823 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias));
1824 } else {
1825 const std::string bias = AllocTemporary();
1826 AddLine("MOV.F {}, {};", bias, Visit(meta.bias));
1827 extra = fmt::format(" {},", bias);
1828 }
1829 }
1830 if (meta.lod) {
1831 ASSERT(!meta.bias);
1832 opcode = "TXL";
1833
1834 if (swizzle < 4) {
1835 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1836 } else {
1837 const std::string lod = AllocTemporary();
1838 AddLine("MOV.F {}, {};", lod, Visit(meta.lod));
1839 extra = fmt::format(" {},", lod);
1840 }
1841 }
1842
1843 AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id,
1844 TextureType(meta), BuildAoffi(operation));
1845 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1846 return fmt::format("{}.x", temporary);
1847}
1848
1849std::string ARBDecompiler::TextureGather(Operation operation) {
1850 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1851 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1852 const auto [temporary, swizzle] = BuildCoords(operation);
1853
1854 std::string comp;
1855 if (!meta.sampler.is_shadow) {
1856 const auto& immediate = std::get<ImmediateNode>(*meta.component);
1857 comp = fmt::format(".{}", Swizzle(immediate.GetValue()));
1858 }
1859
1860 AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp,
1861 TextureType(meta), BuildAoffi(operation));
1862 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1863 return fmt::format("{}.x", temporary);
1864}
1865
1866std::string ARBDecompiler::TextureQueryDimensions(Operation operation) {
1867 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1868 const std::string temporary = AllocVectorTemporary();
1869 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1870
1871 ASSERT(!meta.sampler.is_array);
1872
1873 const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0";
1874 AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta));
1875 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1876 return fmt::format("{}.x", temporary);
1877}
1878
1879std::string ARBDecompiler::TextureQueryLod(Operation operation) {
1880 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1881 const std::string temporary = AllocVectorTemporary();
1882 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1883
1884 ASSERT(!meta.sampler.is_array);
1885
1886 const std::size_t count = operation.GetOperandsCount();
1887 for (std::size_t i = 0; i < count; ++i) {
1888 AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1889 }
1890 AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta));
1891 AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary);
1892 AddLine("TRUNC.S {}, {};", temporary, temporary);
1893 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1894 return fmt::format("{}.x", temporary);
1895}
1896
1897std::string ARBDecompiler::TexelFetch(Operation operation) {
1898 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1899 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1900 const auto [temporary, swizzle] = BuildCoords(operation);
1901
1902 if (!meta.sampler.is_buffer) {
1903 ASSERT(swizzle < 4);
1904 AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod));
1905 }
1906 AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta),
1907 BuildAoffi(operation));
1908 AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1909 return fmt::format("{}.x", temporary);
1910}
1911
1912std::string ARBDecompiler::TextureGradient(Operation operation) {
1913 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1914 const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index;
1915 const std::string ddx = AllocVectorTemporary();
1916 const std::string ddy = AllocVectorTemporary();
1917 const std::string coord = BuildCoords(operation).first;
1918
1919 const std::size_t num_components = meta.derivates.size() / 2;
1920 for (std::size_t index = 0; index < num_components; ++index) {
1921 const char swizzle = Swizzle(index);
1922 AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2]));
1923 AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1]));
1924 }
1925
1926 const std::string_view result = coord;
1927 AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id,
1928 TextureType(meta), BuildAoffi(operation));
1929 AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element));
1930 return fmt::format("{}.x", result);
1931}
1932
1933std::string ARBDecompiler::ImageLoad(Operation operation) {
1934 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1935 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1936 const std::size_t count = operation.GetOperandsCount();
1937 const std::string_view type = ImageType(meta.image.type);
1938
1939 const std::string temporary = AllocVectorTemporary();
1940 for (std::size_t i = 0; i < count; ++i) {
1941 AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i]));
1942 }
1943 AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type);
1944 AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element));
1945 return fmt::format("{}.x", temporary);
1946}
1947
1948std::string ARBDecompiler::ImageStore(Operation operation) {
1949 const auto& meta = std::get<MetaImage>(operation.GetMeta());
1950 const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index;
1951 const std::size_t num_coords = operation.GetOperandsCount();
1952 const std::size_t num_values = meta.values.size();
1953 const std::string_view type = ImageType(meta.image.type);
1954
1955 const std::string coord = AllocVectorTemporary();
1956 const std::string value = AllocVectorTemporary();
1957 for (std::size_t i = 0; i < num_coords; ++i) {
1958 AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i]));
1959 }
1960 for (std::size_t i = 0; i < num_values; ++i) {
1961 AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i]));
1962 }
1963 AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type);
1964 return {};
1965}
1966
1967std::string ARBDecompiler::Branch(Operation operation) {
1968 const auto target = std::get<ImmediateNode>(*operation[0]);
1969 AddLine("MOV.U PC.x, {};", target.GetValue());
1970 AddLine("CONT;");
1971 return {};
1972}
1973
1974std::string ARBDecompiler::BranchIndirect(Operation operation) {
1975 AddLine("MOV.U PC.x, {};", Visit(operation[0]));
1976 AddLine("CONT;");
1977 return {};
1978}
1979
1980std::string ARBDecompiler::PushFlowStack(Operation operation) {
1981 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1982 const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue();
1983 const std::string_view stack_name = StackName(stack);
1984 AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target);
1985 AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1986 return {};
1987}
1988
1989std::string ARBDecompiler::PopFlowStack(Operation operation) {
1990 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1991 const std::string_view stack_name = StackName(stack);
1992 AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name);
1993 AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name);
1994 AddLine("CONT;");
1995 return {};
1996}
1997
1998std::string ARBDecompiler::Exit(Operation) {
1999 Exit();
2000 return {};
2001}
2002
2003std::string ARBDecompiler::Discard(Operation) {
2004 AddLine("KIL TR;");
2005 return {};
2006}
2007
2008std::string ARBDecompiler::EmitVertex(Operation) {
2009 AddLine("EMIT;");
2010 return {};
2011}
2012
2013std::string ARBDecompiler::EndPrimitive(Operation) {
2014 AddLine("ENDPRIM;");
2015 return {};
2016}
2017
2018std::string ARBDecompiler::InvocationId(Operation) {
2019 return "primitive.invocation";
2020}
2021
2022std::string ARBDecompiler::YNegate(Operation) {
2023 LOG_WARNING(Render_OpenGL, "(STUBBED)");
2024 const std::string temporary = AllocTemporary();
2025 AddLine("MOV.F {}, 1;", temporary);
2026 return temporary;
2027}
2028
2029std::string ARBDecompiler::ThreadId(Operation) {
2030 return fmt::format("{}.threadid", StageInputName(stage));
2031}
2032
2033std::string ARBDecompiler::ShuffleIndexed(Operation operation) {
2034 if (!device.HasWarpIntrinsics()) {
2035 LOG_ERROR(Render_OpenGL,
2036 "NV_shader_thread_shuffle is missing. Kepler or better is required.");
2037 return Visit(operation[0]);
2038 }
2039 const std::string temporary = AllocVectorTemporary();
2040 AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]),
2041 Visit(operation[1]));
2042 AddLine("MOV.U {}.x, {}.y;", temporary, temporary);
2043 return fmt::format("{}.x", temporary);
2044}
2045
2046std::string ARBDecompiler::Barrier(Operation) {
2047 if (!ir.IsDecompiled()) {
2048 LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled");
2049 return {};
2050 }
2051 AddLine("BAR;");
2052 return {};
2053}
2054
2055std::string ARBDecompiler::MemoryBarrierGroup(Operation) {
2056 AddLine("MEMBAR.CTA;");
2057 return {};
2058}
2059
2060std::string ARBDecompiler::MemoryBarrierGlobal(Operation) {
2061 AddLine("MEMBAR;");
2062 return {};
2063}
2064
2065} // Anonymous namespace
2066
2067std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
2068 const VideoCommon::Shader::Registry& registry,
2069 Tegra::Engines::ShaderType stage, std::string_view identifier) {
2070 return ARBDecompiler(device, ir, registry, stage, identifier).Code();
2071}
2072
2073} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h
new file mode 100644
index 000000000..6afc87220
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@@ -0,0 +1,29 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <string_view>
9
10#include "common/common_types.h"
11
12namespace Tegra::Engines {
13enum class ShaderType : u32;
14}
15
16namespace VideoCommon::Shader {
17class ShaderIR;
18class Registry;
19} // namespace VideoCommon::Shader
20
21namespace OpenGL {
22
23class Device;
24
25std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
26 const VideoCommon::Shader::Registry& registry,
27 Tegra::Engines::ShaderType stage, std::string_view identifier);
28
29} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 9964ea894..d9f7b4cc6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -22,22 +22,46 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
22 22
23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 23MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
24 24
25CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) 25Buffer::Buffer(const Device& device, VAddr cpu_addr, std::size_t size)
26 : VideoCommon::BufferBlock{cpu_addr, size} { 26 : VideoCommon::BufferBlock{cpu_addr, size} {
27 gl_buffer.Create(); 27 gl_buffer.Create();
28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 28 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
29 if (device.HasVertexBufferUnifiedMemory()) {
30 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE);
31 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
32 }
29} 33}
30 34
31CachedBufferBlock::~CachedBufferBlock() = default; 35Buffer::~Buffer() = default;
36
37void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
38 glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
39 data);
40}
41
42void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
43 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
44 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
45 glGetNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size),
46 data);
47}
48
49void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
50 std::size_t size) const {
51 glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset),
52 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
53}
32 54
33OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 55OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
34 const Device& device, std::size_t stream_size) 56 const Device& device_, std::size_t stream_size)
35 : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} { 57 : GenericBufferCache{rasterizer, system,
58 std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
59 device{device_} {
36 if (!device.HasFastBufferSubData()) { 60 if (!device.HasFastBufferSubData()) {
37 return; 61 return;
38 } 62 }
39 63
40 static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); 64 static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
41 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 65 glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
42 for (const GLuint cbuf : cbufs) { 66 for (const GLuint cbuf : cbufs) {
43 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); 67 glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
@@ -48,44 +72,21 @@ OGLBufferCache::~OGLBufferCache() {
48 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 72 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
49} 73}
50 74
51Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { 75std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
52 return std::make_shared<CachedBufferBlock>(cpu_addr, size); 76 return std::make_shared<Buffer>(device, cpu_addr, size);
53}
54
55GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
56 return buffer->GetHandle();
57}
58
59GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
60 return 0;
61} 77}
62 78
63void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 79OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) {
64 const u8* data) { 80 return {0, 0, 0};
65 glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
66 static_cast<GLsizeiptr>(size), data);
67}
68
69void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
70 u8* data) {
71 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
72 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
73 glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
74 static_cast<GLsizeiptr>(size), data);
75}
76
77void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
78 std::size_t dst_offset, std::size_t size) {
79 glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
80 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
81} 81}
82 82
83OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, 83OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
84 std::size_t size) { 84 std::size_t size) {
85 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); 85 DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
86 const GLuint& cbuf = cbufs[cbuf_cursor++]; 86 const GLuint cbuf = cbufs[cbuf_cursor++];
87
87 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); 88 glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
88 return {cbuf, 0}; 89 return {cbuf, 0, 0};
89} 90}
90 91
91} // namespace OpenGL 92} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..59d95adbc 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/rasterizer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_stream_buffer.h" 14#include "video_core/renderer_opengl/gl_stream_buffer.h"
16 15
@@ -24,57 +23,57 @@ class Device;
24class OGLStreamBuffer; 23class OGLStreamBuffer;
25class RasterizerOpenGL; 24class RasterizerOpenGL;
26 25
27class CachedBufferBlock; 26class Buffer : public VideoCommon::BufferBlock {
27public:
28 explicit Buffer(const Device& device, VAddr cpu_addr, std::size_t size);
29 ~Buffer();
28 30
29using Buffer = std::shared_ptr<CachedBufferBlock>; 31 void Upload(std::size_t offset, std::size_t size, const u8* data) const;
30using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
31 32
32class CachedBufferBlock : public VideoCommon::BufferBlock { 33 void Download(std::size_t offset, std::size_t size, u8* data) const;
33public: 34
34 explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); 35 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
35 ~CachedBufferBlock(); 36 std::size_t size) const;
36 37
37 GLuint GetHandle() const { 38 GLuint Handle() const noexcept {
38 return gl_buffer.handle; 39 return gl_buffer.handle;
39 } 40 }
40 41
42 u64 Address() const noexcept {
43 return gpu_address;
44 }
45
41private: 46private:
42 OGLBuffer gl_buffer; 47 OGLBuffer gl_buffer;
48 u64 gpu_address = 0;
43}; 49};
44 50
51using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
45class OGLBufferCache final : public GenericBufferCache { 52class OGLBufferCache final : public GenericBufferCache {
46public: 53public:
47 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, 54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
48 const Device& device, std::size_t stream_size); 55 const Device& device, std::size_t stream_size);
49 ~OGLBufferCache(); 56 ~OGLBufferCache();
50 57
51 GLuint GetEmptyBuffer(std::size_t) override; 58 BufferInfo GetEmptyBuffer(std::size_t) override;
52 59
53 void Acquire() noexcept { 60 void Acquire() noexcept {
54 cbuf_cursor = 0; 61 cbuf_cursor = 0;
55 } 62 }
56 63
57protected: 64protected:
58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; 65 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
59
60 GLuint ToHandle(const Buffer& buffer) override;
61
62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
63 const u8* data) override;
64
65 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
66 u8* data) override;
67
68 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
69 std::size_t dst_offset, std::size_t size) override;
70 66
71 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; 67 BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
72 68
73private: 69private:
70 static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
71 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
72
73 const Device& device;
74
74 std::size_t cbuf_cursor = 0; 75 std::size_t cbuf_cursor = 0;
75 std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * 76 std::array<GLuint, NUM_CBUFS> cbufs{};
76 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
77 cbufs;
78}; 77};
79 78
80} // namespace OpenGL 79} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 466a911db..b6b6659c1 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9#include <limits>
9#include <optional> 10#include <optional>
10#include <vector> 11#include <vector>
11 12
@@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
26 27
27constexpr u32 NumStages = 5; 28constexpr u32 NumStages = 5;
28 29
29constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 30constexpr std::array LimitUBOs = {
30 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, 31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
31 GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; 32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
32 34
33constexpr std::array LimitSSBOs = { 35constexpr std::array LimitSSBOs = {
34 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, 36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
35 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, 37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
36 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; 38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
37 39
38constexpr std::array LimitSamplers = { 40constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
39 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 41 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
40 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, 42 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
41 GL_MAX_TEXTURE_IMAGE_UNITS}; 43 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
44 GL_MAX_TEXTURE_IMAGE_UNITS,
45 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
42 46
43constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, 47constexpr std::array LimitImages = {
44 GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, 48 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
45 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, 49 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
46 GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; 50 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
47 51
48template <typename T> 52template <typename T>
49T GetInteger(GLenum pname) { 53T GetInteger(GLenum pname) {
@@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
85 return std::exchange(base, base + amount); 89 return std::exchange(base, base + amount);
86} 90}
87 91
92std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
93 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
94 std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
95 [](GLenum pname) { return GetInteger<u32>(pname); });
96 return max;
97}
98
88std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { 99std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
89 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; 100 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
90 101
@@ -112,16 +123,24 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
112 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); 123 u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
113 u32 base_images = 0; 124 u32 base_images = 0;
114 125
115 // Reserve more image bindings on fragment and vertex stages. 126 // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
127 // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
128 // fragment stage, and at least 1 for the rest of the stages.
129 // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
130
131 // Reserve at least 4 image bindings on the fragment stage.
116 bindings[4].image = 132 bindings[4].image =
117 Extract(base_images, num_images, num_images / NumStages + 2, LimitImages[4]); 133 Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
118 bindings[0].image = 134
119 Extract(base_images, num_images, num_images / NumStages + 1, LimitImages[0]); 135 // This is guaranteed to be at least 1.
136 const u32 total_extracted_images = num_images / (NumStages - 1);
120 137
121 // Reserve the other image bindings. 138 // Reserve the other image bindings.
122 const u32 total_extracted_images = num_images / (NumStages - 2); 139 for (std::size_t i = 0; i < NumStages; ++i) {
123 for (std::size_t i = 2; i < NumStages; ++i) {
124 const std::size_t stage = stage_swizzle[i]; 140 const std::size_t stage = stage_swizzle[i];
141 if (stage == 4) {
142 continue;
143 }
125 bindings[stage].image = 144 bindings[stage].image =
126 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]); 145 Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
127 } 146 }
@@ -133,6 +152,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
133} 152}
134 153
135bool IsASTCSupported() { 154bool IsASTCSupported() {
155 static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
136 static constexpr std::array formats = { 156 static constexpr std::array formats = {
137 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 157 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
138 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 158 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
@@ -149,25 +169,59 @@ bool IsASTCSupported() {
149 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 169 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
150 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 170 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
151 }; 171 };
152 return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { 172 static constexpr std::array required_support = {
153 GLint supported; 173 GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
154 glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, 174 GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
155 &supported); 175 };
156 return supported == GL_TRUE; 176
157 }) == formats.end(); 177 for (const GLenum target : targets) {
178 for (const GLenum format : formats) {
179 for (const GLenum support : required_support) {
180 GLint value;
181 glGetInternalformativ(target, format, support, 1, &value);
182 if (value != GL_FULL_SUPPORT) {
183 return false;
184 }
185 }
186 }
187 }
188 return true;
189}
190
191/// @brief Returns true when a GL_RENDERER is a Turing GPU
192/// @param renderer GL_RENDERER string
193bool IsTuring(std::string_view renderer) {
194 static constexpr std::array<std::string_view, 12> TURING_GPUS = {
195 "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070",
196 "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000",
197 "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4",
198 };
199 return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(),
200 [renderer](std::string_view candidate) {
201 return renderer.find(candidate) != std::string_view::npos;
202 });
158} 203}
159 204
160} // Anonymous namespace 205} // Anonymous namespace
161 206
162Device::Device() : base_bindings{BuildBaseBindings()} { 207Device::Device()
208 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
163 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 209 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
164 const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); 210 const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
211 const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
165 const std::vector extensions = GetExtensions(); 212 const std::vector extensions = GetExtensions();
166 213
167 const bool is_nvidia = vendor == "NVIDIA Corporation"; 214 const bool is_nvidia = vendor == "NVIDIA Corporation";
168 const bool is_amd = vendor == "ATI Technologies Inc."; 215 const bool is_amd = vendor == "ATI Technologies Inc.";
169 const bool is_intel = vendor == "Intel"; 216 const bool is_turing = is_nvidia && IsTuring(renderer);
170 const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; 217
218 bool disable_fast_buffer_sub_data = false;
219 if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
220 LOG_WARNING(
221 Render_OpenGL,
222 "Beta driver 443.24 is known to have issues. There might be performance issues.");
223 disable_fast_buffer_sub_data = true;
224 }
171 225
172 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 226 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
173 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 227 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -178,14 +232,24 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
178 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 232 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
179 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 233 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
180 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 234 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
235 has_texture_shadow_lod = HasExtension(extensions, "GL_EXT_texture_shadow_lod");
181 has_astc = IsASTCSupported(); 236 has_astc = IsASTCSupported();
182 has_variable_aoffi = TestVariableAoffi(); 237 has_variable_aoffi = TestVariableAoffi();
183 has_component_indexing_bug = is_amd; 238 has_component_indexing_bug = is_amd;
184 has_precise_bug = TestPreciseBug(); 239 has_precise_bug = TestPreciseBug();
185 has_broken_compute = is_intel_proprietary; 240 has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
186 has_fast_buffer_sub_data = is_nvidia; 241
242 // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
243 // uniform buffers as "push constants"
244 has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
245
246 // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on
247 // DeleteBuffers. Disable unified memory on these devices.
248 has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing;
249
187 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 250 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
188 GLAD_GL_NV_compute_program5; 251 GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
252 GLAD_GL_NV_transform_feedback2;
189 253
190 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 254 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
191 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); 255 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
@@ -197,17 +261,17 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
197} 261}
198 262
199Device::Device(std::nullptr_t) { 263Device::Device(std::nullptr_t) {
200 uniform_buffer_alignment = 0; 264 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
265 uniform_buffer_alignment = 4;
266 shader_storage_alignment = 4;
201 max_vertex_attributes = 16; 267 max_vertex_attributes = 16;
202 max_varyings = 15; 268 max_varyings = 15;
203 has_warp_intrinsics = true; 269 has_warp_intrinsics = true;
204 has_shader_ballot = true; 270 has_shader_ballot = true;
205 has_vertex_viewport_layer = true; 271 has_vertex_viewport_layer = true;
206 has_image_load_formatted = true; 272 has_image_load_formatted = true;
273 has_texture_shadow_lod = true;
207 has_variable_aoffi = true; 274 has_variable_aoffi = true;
208 has_component_indexing_bug = false;
209 has_broken_compute = false;
210 has_precise_bug = false;
211} 275}
212 276
213bool Device::TestVariableAoffi() { 277bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e915dbd86..e1d811966 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -24,6 +24,10 @@ public:
24 explicit Device(); 24 explicit Device();
25 explicit Device(std::nullptr_t); 25 explicit Device(std::nullptr_t);
26 26
27 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
29 }
30
27 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { 31 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
28 return base_bindings[stage_index]; 32 return base_bindings[stage_index];
29 } 33 }
@@ -64,6 +68,14 @@ public:
64 return has_image_load_formatted; 68 return has_image_load_formatted;
65 } 69 }
66 70
71 bool HasTextureShadowLod() const {
72 return has_texture_shadow_lod;
73 }
74
75 bool HasVertexBufferUnifiedMemory() const {
76 return has_vertex_buffer_unified_memory;
77 }
78
67 bool HasASTC() const { 79 bool HasASTC() const {
68 return has_astc; 80 return has_astc;
69 } 81 }
@@ -80,14 +92,14 @@ public:
80 return has_precise_bug; 92 return has_precise_bug;
81 } 93 }
82 94
83 bool HasBrokenCompute() const {
84 return has_broken_compute;
85 }
86
87 bool HasFastBufferSubData() const { 95 bool HasFastBufferSubData() const {
88 return has_fast_buffer_sub_data; 96 return has_fast_buffer_sub_data;
89 } 97 }
90 98
99 bool HasNvViewportArray2() const {
100 return has_nv_viewport_array2;
101 }
102
91 bool UseAssemblyShaders() const { 103 bool UseAssemblyShaders() const {
92 return use_assembly_shaders; 104 return use_assembly_shaders;
93 } 105 }
@@ -96,7 +108,8 @@ private:
96 static bool TestVariableAoffi(); 108 static bool TestVariableAoffi();
97 static bool TestPreciseBug(); 109 static bool TestPreciseBug();
98 110
99 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; 111 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
112 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
100 std::size_t uniform_buffer_alignment{}; 113 std::size_t uniform_buffer_alignment{};
101 std::size_t shader_storage_alignment{}; 114 std::size_t shader_storage_alignment{};
102 u32 max_vertex_attributes{}; 115 u32 max_vertex_attributes{};
@@ -105,12 +118,14 @@ private:
105 bool has_shader_ballot{}; 118 bool has_shader_ballot{};
106 bool has_vertex_viewport_layer{}; 119 bool has_vertex_viewport_layer{};
107 bool has_image_load_formatted{}; 120 bool has_image_load_formatted{};
121 bool has_texture_shadow_lod{};
122 bool has_vertex_buffer_unified_memory{};
108 bool has_astc{}; 123 bool has_astc{};
109 bool has_variable_aoffi{}; 124 bool has_variable_aoffi{};
110 bool has_component_indexing_bug{}; 125 bool has_component_indexing_bug{};
111 bool has_precise_bug{}; 126 bool has_precise_bug{};
112 bool has_broken_compute{};
113 bool has_fast_buffer_sub_data{}; 127 bool has_fast_buffer_sub_data{};
128 bool has_nv_viewport_array2{};
114 bool use_assembly_shaders{}; 129 bool use_assembly_shaders{};
115}; 130};
116 131
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 716d43e65..362457ffe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/maxwell_to_gl.h" 31#include "video_core/renderer_opengl/maxwell_to_gl.h"
32#include "video_core/renderer_opengl/renderer_opengl.h" 32#include "video_core/renderer_opengl/renderer_opengl.h"
33#include "video_core/shader_cache.h"
33 34
34namespace OpenGL { 35namespace OpenGL {
35 36
@@ -54,15 +55,34 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
54 55
55namespace { 56namespace {
56 57
57constexpr std::size_t NumSupportedVertexAttributes = 16; 58constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
59constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
60 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
61constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
62 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
63
64constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
65constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
58 66
59template <typename Engine, typename Entry> 67template <typename Engine, typename Entry>
60Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 68Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
61 ShaderType shader_type, std::size_t index = 0) { 69 ShaderType shader_type, std::size_t index = 0) {
70 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
71 if (entry.is_separated) {
72 const u32 buffer_1 = entry.buffer;
73 const u32 buffer_2 = entry.secondary_buffer;
74 const u32 offset_1 = entry.offset;
75 const u32 offset_2 = entry.secondary_offset;
76 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
77 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
78 return engine.GetTextureInfo(handle_1 | handle_2);
79 }
80 }
62 if (entry.is_bindless) { 81 if (entry.is_bindless) {
63 const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 82 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
64 return engine.GetTextureInfo(tex_handle); 83 return engine.GetTextureInfo(handle);
65 } 84 }
85
66 const auto& gpu_profile = engine.AccessGuestDriverProfile(); 86 const auto& gpu_profile = engine.AccessGuestDriverProfile();
67 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); 87 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
68 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 88 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -87,6 +107,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
87 return buffer.size; 107 return buffer.size;
88} 108}
89 109
110/// Translates hardware transform feedback indices
111/// @param location Hardware location
112/// @return Pair of ARB_transform_feedback3 token stream first and third arguments
113/// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt
114std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
115 const u8 index = location / 4;
116 if (index >= 8 && index <= 39) {
117 return {GL_GENERIC_ATTRIB_NV, index - 8};
118 }
119 if (index >= 48 && index <= 55) {
120 return {GL_TEXTURE_COORD_NV, index - 48};
121 }
122 switch (index) {
123 case 7:
124 return {GL_POSITION, 0};
125 case 40:
126 return {GL_PRIMARY_COLOR_NV, 0};
127 case 41:
128 return {GL_SECONDARY_COLOR_NV, 0};
129 case 42:
130 return {GL_BACK_PRIMARY_COLOR_NV, 0};
131 case 43:
132 return {GL_BACK_SECONDARY_COLOR_NV, 0};
133 }
134 UNIMPLEMENTED_MSG("index={}", static_cast<int>(index));
135 return {GL_POSITION, 0};
136}
137
90void oglEnable(GLenum cap, bool state) { 138void oglEnable(GLenum cap, bool state) {
91 (state ? glEnable : glDisable)(cap); 139 (state ? glEnable : glDisable)(cap);
92} 140}
@@ -104,6 +152,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
104 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { 152 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
105 CheckExtensions(); 153 CheckExtensions();
106 154
155 unified_uniform_buffer.Create();
156 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
157
107 if (device.UseAssemblyShaders()) { 158 if (device.UseAssemblyShaders()) {
108 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); 159 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
109 for (const GLuint cbuf : staging_cbufs) { 160 for (const GLuint cbuf : staging_cbufs) {
@@ -143,7 +194,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
143 // avoid OpenGL errors. 194 // avoid OpenGL errors.
144 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't 195 // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
145 // assume every shader uses them all. 196 // assume every shader uses them all.
146 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { 197 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
147 if (!flags[Dirty::VertexFormat0 + index]) { 198 if (!flags[Dirty::VertexFormat0 + index]) {
148 continue; 199 continue;
149 } 200 }
@@ -181,9 +232,11 @@ void RasterizerOpenGL::SetupVertexBuffer() {
181 232
182 MICROPROFILE_SCOPE(OpenGL_VB); 233 MICROPROFILE_SCOPE(OpenGL_VB);
183 234
235 const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
236
184 // Upload all guest vertex arrays sequentially to our buffer 237 // Upload all guest vertex arrays sequentially to our buffer
185 const auto& regs = gpu.regs; 238 const auto& regs = gpu.regs;
186 for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { 239 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
187 if (!flags[Dirty::VertexBuffer0 + index]) { 240 if (!flags[Dirty::VertexBuffer0 + index]) {
188 continue; 241 continue;
189 } 242 }
@@ -196,16 +249,25 @@ void RasterizerOpenGL::SetupVertexBuffer() {
196 249
197 const GPUVAddr start = vertex_array.StartAddress(); 250 const GPUVAddr start = vertex_array.StartAddress();
198 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); 251 const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
199
200 ASSERT(end >= start); 252 ASSERT(end >= start);
253
254 const GLuint gl_index = static_cast<GLuint>(index);
201 const u64 size = end - start; 255 const u64 size = end - start;
202 if (size == 0) { 256 if (size == 0) {
203 glBindVertexBuffer(static_cast<GLuint>(index), 0, 0, vertex_array.stride); 257 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
258 if (use_unified_memory) {
259 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0);
260 }
204 continue; 261 continue;
205 } 262 }
206 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); 263 const auto info = buffer_cache.UploadMemory(start, size);
207 glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, 264 if (use_unified_memory) {
208 vertex_array.stride); 265 glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride);
266 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index,
267 info.address + info.offset, size);
268 } else {
269 glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride);
270 }
209 } 271 }
210} 272}
211 273
@@ -218,7 +280,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
218 flags[Dirty::VertexInstances] = false; 280 flags[Dirty::VertexInstances] = false;
219 281
220 const auto& regs = gpu.regs; 282 const auto& regs = gpu.regs;
221 for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { 283 for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
222 if (!flags[Dirty::VertexInstance0 + index]) { 284 if (!flags[Dirty::VertexInstance0 + index]) {
223 continue; 285 continue;
224 } 286 }
@@ -235,9 +297,9 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
235 MICROPROFILE_SCOPE(OpenGL_Index); 297 MICROPROFILE_SCOPE(OpenGL_Index);
236 const auto& regs = system.GPU().Maxwell3D().regs; 298 const auto& regs = system.GPU().Maxwell3D().regs;
237 const std::size_t size = CalculateIndexBufferSize(); 299 const std::size_t size = CalculateIndexBufferSize();
238 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); 300 const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
239 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); 301 glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
240 return offset; 302 return info.offset;
241} 303}
242 304
243void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 305void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
@@ -273,7 +335,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
273 continue; 335 continue;
274 } 336 }
275 337
276 Shader shader{shader_cache.GetStageProgram(program)}; 338 Shader* const shader = shader_cache.GetStageProgram(program);
277 339
278 if (device.UseAssemblyShaders()) { 340 if (device.UseAssemblyShaders()) {
279 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this 341 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -567,7 +629,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
567 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 629 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
568 630
569 // Prepare the vertex array. 631 // Prepare the vertex array.
570 buffer_cache.Map(buffer_size); 632 const bool invalidated = buffer_cache.Map(buffer_size);
633
634 if (invalidated) {
635 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
636 auto& dirty = gpu.dirty.flags;
637 dirty[Dirty::VertexBuffers] = true;
638 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
639 dirty[index] = true;
640 }
641 }
571 642
572 // Prepare vertex array format. 643 // Prepare vertex array format.
573 SetupVertexFormat(); 644 SetupVertexFormat();
@@ -584,9 +655,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
584 if (!device.UseAssemblyShaders()) { 655 if (!device.UseAssemblyShaders()) {
585 MaxwellUniformData ubo; 656 MaxwellUniformData ubo;
586 ubo.SetFromRegs(gpu); 657 ubo.SetFromRegs(gpu);
587 const auto [buffer, offset] = 658 const auto info =
588 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 659 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
589 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, 660 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
590 static_cast<GLsizeiptr>(sizeof(ubo))); 661 static_cast<GLsizeiptr>(sizeof(ubo)));
591 } 662 }
592 663
@@ -655,10 +726,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655} 726}
656 727
657void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 728void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
658 if (device.HasBrokenCompute()) {
659 return;
660 }
661
662 buffer_cache.Acquire(); 729 buffer_cache.Acquire();
663 current_cbuf = 0; 730 current_cbuf = 0;
664 731
@@ -837,7 +904,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
837 return true; 904 return true;
838} 905}
839 906
840void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { 907void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
841 static constexpr std::array PARAMETER_LUT = { 908 static constexpr std::array PARAMETER_LUT = {
842 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 909 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
843 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, 910 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -846,41 +913,62 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
846 MICROPROFILE_SCOPE(OpenGL_UBO); 913 MICROPROFILE_SCOPE(OpenGL_UBO);
847 const auto& stages = system.GPU().Maxwell3D().state.shader_stages; 914 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
848 const auto& shader_stage = stages[stage_index]; 915 const auto& shader_stage = stages[stage_index];
916 const auto& entries = shader->GetEntries();
917 const bool use_unified = entries.use_unified_uniforms;
918 const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
849 919
850 u32 binding = 920 const auto base_bindings = device.GetBaseBindings(stage_index);
851 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; 921 u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
852 for (const auto& entry : shader->GetEntries().const_buffers) { 922 for (const auto& entry : entries.const_buffers) {
853 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 923 const u32 index = entry.GetIndex();
854 SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); 924 const auto& buffer = shader_stage.const_buffers[index];
925 SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
926 base_unified_offset + index * Maxwell::MaxConstBufferSize);
927 ++binding;
928 }
929 if (use_unified) {
930 const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
931 entries.global_memory_entries.size());
932 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
933 base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
855 } 934 }
856} 935}
857 936
858void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { 937void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
859 MICROPROFILE_SCOPE(OpenGL_UBO); 938 MICROPROFILE_SCOPE(OpenGL_UBO);
860 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 939 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
940 const auto& entries = kernel->GetEntries();
941 const bool use_unified = entries.use_unified_uniforms;
861 942
862 u32 binding = 0; 943 u32 binding = 0;
863 for (const auto& entry : kernel->GetEntries().const_buffers) { 944 for (const auto& entry : entries.const_buffers) {
864 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 945 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
865 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 946 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
866 Tegra::Engines::ConstBufferInfo buffer; 947 Tegra::Engines::ConstBufferInfo buffer;
867 buffer.address = config.Address(); 948 buffer.address = config.Address();
868 buffer.size = config.size; 949 buffer.size = config.size;
869 buffer.enabled = mask[entry.GetIndex()]; 950 buffer.enabled = mask[entry.GetIndex()];
870 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); 951 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
952 use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
953 ++binding;
954 }
955 if (use_unified) {
956 const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
957 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
958 NUM_CONST_BUFFERS_BYTES_PER_STAGE);
871 } 959 }
872} 960}
873 961
874void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, 962void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
875 const Tegra::Engines::ConstBufferInfo& buffer, 963 const Tegra::Engines::ConstBufferInfo& buffer,
876 const ConstBufferEntry& entry) { 964 const ConstBufferEntry& entry, bool use_unified,
965 std::size_t unified_offset) {
877 if (!buffer.enabled) { 966 if (!buffer.enabled) {
878 // Set values to zero to unbind buffers 967 // Set values to zero to unbind buffers
879 if (device.UseAssemblyShaders()) { 968 if (device.UseAssemblyShaders()) {
880 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); 969 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
881 } else { 970 } else {
882 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 971 glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float));
883 buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
884 } 972 }
885 return; 973 return;
886 } 974 }
@@ -889,23 +977,33 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
889 // UBO alignment requirements. 977 // UBO alignment requirements.
890 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); 978 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
891 979
892 const auto alignment = device.GetUniformBufferAlignment(); 980 const bool fast_upload = !use_unified && device.HasFastBufferSubData();
893 auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, 981
894 device.HasFastBufferSubData()); 982 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
895 if (!device.UseAssemblyShaders()) { 983 const GPUVAddr gpu_addr = buffer.address;
896 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 984 auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
985
986 if (device.UseAssemblyShaders()) {
987 UNIMPLEMENTED_IF(use_unified);
988 if (info.offset != 0) {
989 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
990 glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size);
991 info.handle = staging_cbuf;
992 info.offset = 0;
993 }
994 glBindBufferRangeNV(stage, binding, info.handle, info.offset, size);
897 return; 995 return;
898 } 996 }
899 if (offset != 0) { 997
900 const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; 998 if (use_unified) {
901 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); 999 glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset,
902 cbuf = staging_cbuf; 1000 unified_offset, size);
903 offset = 0; 1001 } else {
1002 glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size);
904 } 1003 }
905 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
906} 1004}
907 1005
908void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 1006void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
909 auto& gpu{system.GPU()}; 1007 auto& gpu{system.GPU()};
910 auto& memory_manager{gpu.MemoryManager()}; 1008 auto& memory_manager{gpu.MemoryManager()};
911 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 1009 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -920,7 +1018,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
920 } 1018 }
921} 1019}
922 1020
923void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { 1021void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
924 auto& gpu{system.GPU()}; 1022 auto& gpu{system.GPU()};
925 auto& memory_manager{gpu.MemoryManager()}; 1023 auto& memory_manager{gpu.MemoryManager()};
926 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 1024 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -937,13 +1035,12 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
937void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, 1035void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
938 GPUVAddr gpu_addr, std::size_t size) { 1036 GPUVAddr gpu_addr, std::size_t size) {
939 const auto alignment{device.GetShaderStorageBufferAlignment()}; 1037 const auto alignment{device.GetShaderStorageBufferAlignment()};
940 const auto [ssbo, buffer_offset] = 1038 const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
941 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); 1039 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
942 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
943 static_cast<GLsizeiptr>(size)); 1040 static_cast<GLsizeiptr>(size));
944} 1041}
945 1042
946void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { 1043void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
947 MICROPROFILE_SCOPE(OpenGL_Texture); 1044 MICROPROFILE_SCOPE(OpenGL_Texture);
948 const auto& maxwell3d = system.GPU().Maxwell3D(); 1045 const auto& maxwell3d = system.GPU().Maxwell3D();
949 u32 binding = device.GetBaseBindings(stage_index).sampler; 1046 u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -956,7 +1053,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
956 } 1053 }
957} 1054}
958 1055
959void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { 1056void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
960 MICROPROFILE_SCOPE(OpenGL_Texture); 1057 MICROPROFILE_SCOPE(OpenGL_Texture);
961 const auto& compute = system.GPU().KeplerCompute(); 1058 const auto& compute = system.GPU().KeplerCompute();
962 u32 binding = 0; 1059 u32 binding = 0;
@@ -985,7 +1082,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
985 } 1082 }
986} 1083}
987 1084
988void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 1085void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
989 const auto& maxwell3d = system.GPU().Maxwell3D(); 1086 const auto& maxwell3d = system.GPU().Maxwell3D();
990 u32 binding = device.GetBaseBindings(stage_index).image; 1087 u32 binding = device.GetBaseBindings(stage_index).image;
991 for (const auto& entry : shader->GetEntries().images) { 1088 for (const auto& entry : shader->GetEntries().images) {
@@ -995,7 +1092,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
995 } 1092 }
996} 1093}
997 1094
998void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 1095void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
999 const auto& compute = system.GPU().KeplerCompute(); 1096 const auto& compute = system.GPU().KeplerCompute();
1000 u32 binding = 0; 1097 u32 binding = 0;
1001 for (const auto& entry : shader->GetEntries().images) { 1098 for (const auto& entry : shader->GetEntries().images) {
@@ -1024,6 +1121,26 @@ void RasterizerOpenGL::SyncViewport() {
1024 const auto& regs = gpu.regs; 1121 const auto& regs = gpu.regs;
1025 1122
1026 const bool dirty_viewport = flags[Dirty::Viewports]; 1123 const bool dirty_viewport = flags[Dirty::Viewports];
1124 const bool dirty_clip_control = flags[Dirty::ClipControl];
1125
1126 if (dirty_clip_control || flags[Dirty::FrontFace]) {
1127 flags[Dirty::FrontFace] = false;
1128
1129 GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
1130 if (regs.screen_y_control.triangle_rast_flip != 0 &&
1131 regs.viewport_transform[0].scale_y < 0.0f) {
1132 switch (mode) {
1133 case GL_CW:
1134 mode = GL_CCW;
1135 break;
1136 case GL_CCW:
1137 mode = GL_CW;
1138 break;
1139 }
1140 }
1141 glFrontFace(mode);
1142 }
1143
1027 if (dirty_viewport || flags[Dirty::ClipControl]) { 1144 if (dirty_viewport || flags[Dirty::ClipControl]) {
1028 flags[Dirty::ClipControl] = false; 1145 flags[Dirty::ClipControl] = false;
1029 1146
@@ -1121,11 +1238,6 @@ void RasterizerOpenGL::SyncCullMode() {
1121 glDisable(GL_CULL_FACE); 1238 glDisable(GL_CULL_FACE);
1122 } 1239 }
1123 } 1240 }
1124
1125 if (flags[Dirty::FrontFace]) {
1126 flags[Dirty::FrontFace] = false;
1127 glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
1128 }
1129} 1241}
1130 1242
1131void RasterizerOpenGL::SyncPrimitiveRestart() { 1243void RasterizerOpenGL::SyncPrimitiveRestart() {
@@ -1496,12 +1608,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
1496 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); 1608 oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
1497} 1609}
1498 1610
1611void RasterizerOpenGL::SyncTransformFeedback() {
1612 // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
1613 // when this is required.
1614 const auto& regs = system.GPU().Maxwell3D().regs;
1615
1616 static constexpr std::size_t STRIDE = 3;
1617 std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
1618 std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams;
1619
1620 GLint* cursor = attribs.data();
1621 GLint* current_stream = streams.data();
1622
1623 for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) {
1624 const auto& layout = regs.tfb_layouts[feedback];
1625 UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding");
1626 if (layout.varying_count == 0) {
1627 continue;
1628 }
1629
1630 *current_stream = static_cast<GLint>(feedback);
1631 if (current_stream != streams.data()) {
1632 // When stepping one stream, push the expected token
1633 cursor[0] = GL_NEXT_BUFFER_NV;
1634 cursor[1] = 0;
1635 cursor[2] = 0;
1636 cursor += STRIDE;
1637 }
1638 ++current_stream;
1639
1640 const auto& locations = regs.tfb_varying_locs[feedback];
1641 std::optional<u8> current_index;
1642 for (u32 offset = 0; offset < layout.varying_count; ++offset) {
1643 const u8 location = locations[offset];
1644 const u8 index = location / 4;
1645
1646 if (current_index == index) {
1647 // Increase number of components of the previous attachment
1648 ++cursor[-2];
1649 continue;
1650 }
1651 current_index = index;
1652
1653 std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location);
1654 cursor[1] = 1;
1655 cursor += STRIDE;
1656 }
1657 }
1658
1659 const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE);
1660 const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data());
1661 glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(),
1662 GL_INTERLEAVED_ATTRIBS);
1663}
1664
1499void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { 1665void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1500 const auto& regs = system.GPU().Maxwell3D().regs; 1666 const auto& regs = system.GPU().Maxwell3D().regs;
1501 if (regs.tfb_enabled == 0) { 1667 if (regs.tfb_enabled == 0) {
1502 return; 1668 return;
1503 } 1669 }
1504 1670
1671 if (device.UseAssemblyShaders()) {
1672 SyncTransformFeedback();
1673 }
1674
1505 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 1675 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
1506 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 1676 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
1507 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); 1677 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
@@ -1528,6 +1698,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
1528 static_cast<GLsizeiptr>(size)); 1698 static_cast<GLsizeiptr>(size));
1529 } 1699 }
1530 1700
1701 // We may have to call BeginTransformFeedbackNV here since they seem to call different
1702 // implementations on Nvidia's driver (the pointer is different) but we are using
1703 // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
1704 // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
1531 glBeginTransformFeedback(GL_POINTS); 1705 glBeginTransformFeedback(GL_POINTS);
1532} 1706}
1533 1707
@@ -1549,8 +1723,9 @@ void RasterizerOpenGL::EndTransformFeedback() {
1549 const GLuint handle = transform_feedback_buffers[index].handle; 1723 const GLuint handle = transform_feedback_buffers[index].handle;
1550 const GPUVAddr gpu_addr = binding.Address(); 1724 const GPUVAddr gpu_addr = binding.Address();
1551 const std::size_t size = binding.buffer_size; 1725 const std::size_t size = binding.buffer_size;
1552 const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 1726 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
1553 glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); 1727 glCopyNamedBufferSubData(handle, info.handle, 0, info.offset,
1728 static_cast<GLsizeiptr>(size));
1554 } 1729 }
1555} 1730}
1556 1731
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 87f7fe159..4f082592f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
19#include "video_core/engines/const_buffer_info.h" 19#include "video_core/engines/const_buffer_info.h"
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/rasterizer_accelerated.h" 21#include "video_core/rasterizer_accelerated.h"
22#include "video_core/rasterizer_cache.h"
23#include "video_core/rasterizer_interface.h" 22#include "video_core/rasterizer_interface.h"
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 23#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 24#include "video_core/renderer_opengl/gl_device.h"
@@ -100,40 +99,41 @@ private:
100 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); 99 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
101 100
102 /// Configures the current constbuffers to use for the draw command. 101 /// Configures the current constbuffers to use for the draw command.
103 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); 102 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
104 103
105 /// Configures the current constbuffers to use for the kernel invocation. 104 /// Configures the current constbuffers to use for the kernel invocation.
106 void SetupComputeConstBuffers(const Shader& kernel); 105 void SetupComputeConstBuffers(Shader* kernel);
107 106
108 /// Configures a constant buffer. 107 /// Configures a constant buffer.
109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 108 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
110 const ConstBufferEntry& entry); 109 const ConstBufferEntry& entry, bool use_unified,
110 std::size_t unified_offset);
111 111
112 /// Configures the current global memory entries to use for the draw command. 112 /// Configures the current global memory entries to use for the draw command.
113 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 113 void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
114 114
115 /// Configures the current global memory entries to use for the kernel invocation. 115 /// Configures the current global memory entries to use for the kernel invocation.
116 void SetupComputeGlobalMemory(const Shader& kernel); 116 void SetupComputeGlobalMemory(Shader* kernel);
117 117
118 /// Configures a constant buffer. 118 /// Configures a constant buffer.
119 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 119 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
120 std::size_t size); 120 std::size_t size);
121 121
122 /// Configures the current textures to use for the draw command. 122 /// Configures the current textures to use for the draw command.
123 void SetupDrawTextures(std::size_t stage_index, const Shader& shader); 123 void SetupDrawTextures(std::size_t stage_index, Shader* shader);
124 124
125 /// Configures the textures used in a compute shader. 125 /// Configures the textures used in a compute shader.
126 void SetupComputeTextures(const Shader& kernel); 126 void SetupComputeTextures(Shader* kernel);
127 127
128 /// Configures a texture. 128 /// Configures a texture.
129 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 129 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
130 const SamplerEntry& entry); 130 const SamplerEntry& entry);
131 131
132 /// Configures images in a graphics shader. 132 /// Configures images in a graphics shader.
133 void SetupDrawImages(std::size_t stage_index, const Shader& shader); 133 void SetupDrawImages(std::size_t stage_index, Shader* shader);
134 134
135 /// Configures images in a compute shader. 135 /// Configures images in a compute shader.
136 void SetupComputeImages(const Shader& shader); 136 void SetupComputeImages(Shader* shader);
137 137
138 /// Configures an image. 138 /// Configures an image.
139 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); 139 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
@@ -201,6 +201,10 @@ private:
201 /// Syncs the framebuffer sRGB state to match the guest state 201 /// Syncs the framebuffer sRGB state to match the guest state
202 void SyncFramebufferSRGB(); 202 void SyncFramebufferSRGB();
203 203
204 /// Syncs transform feedback state to match guest state
205 /// @note Only valid on assembly shaders
206 void SyncTransformFeedback();
207
204 /// Begin a transform feedback 208 /// Begin a transform feedback
205 void BeginTransformFeedback(GLenum primitive_mode); 209 void BeginTransformFeedback(GLenum primitive_mode);
206 210
@@ -253,6 +257,7 @@ private:
253 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; 257 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
254 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; 258 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
255 std::size_t current_cbuf = 0; 259 std::size_t current_cbuf = 0;
260 OGLBuffer unified_uniform_buffer;
256 261
257 /// Number of commands queued to the OpenGL driver. Reseted on flush. 262 /// Number of commands queued to the OpenGL driver. Reseted on flush.
258 std::size_t num_queued_commands = 0; 263 std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4cd0f36cf..c6a3bf3a1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -20,6 +20,7 @@
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/engines/shader_type.h" 21#include "video_core/engines/shader_type.h"
22#include "video_core/memory_manager.h" 22#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_arb_decompiler.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_decompiler.h" 26#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -29,6 +30,7 @@
29#include "video_core/shader/memory_util.h" 30#include "video_core/shader/memory_util.h"
30#include "video_core/shader/registry.h" 31#include "video_core/shader/registry.h"
31#include "video_core/shader/shader_ir.h" 32#include "video_core/shader/shader_ir.h"
33#include "video_core/shader_cache.h"
32 34
33namespace OpenGL { 35namespace OpenGL {
34 36
@@ -147,7 +149,8 @@ ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 u
147 auto program = std::make_shared<ProgramHandle>(); 149 auto program = std::make_shared<ProgramHandle>();
148 150
149 if (device.UseAssemblyShaders()) { 151 if (device.UseAssemblyShaders()) {
150 const std::string arb = "Not implemented"; 152 const std::string arb =
153 DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
151 154
152 GLuint& arb_prog = program->assembly_program.handle; 155 GLuint& arb_prog = program->assembly_program.handle;
153 156
@@ -194,12 +197,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
194 197
195} // Anonymous namespace 198} // Anonymous namespace
196 199
197CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 200Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
198 std::shared_ptr<VideoCommon::Shader::Registry> registry, 201 ProgramSharedPtr program_)
199 ShaderEntries entries, ProgramSharedPtr program_) 202 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
200 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
201 size_in_bytes{size_in_bytes}, program{std::move(program_)} {
202 // Assign either the assembly program or source program. We can't have both.
203 handle = program->assembly_program.handle; 203 handle = program->assembly_program.handle;
204 if (handle == 0) { 204 if (handle == 0) {
205 handle = program->source_program.handle; 205 handle = program->source_program.handle;
@@ -207,16 +207,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
207 ASSERT(handle != 0); 207 ASSERT(handle != 0);
208} 208}
209 209
210CachedShader::~CachedShader() = default; 210Shader::~Shader() = default;
211 211
212GLuint CachedShader::GetHandle() const { 212GLuint Shader::GetHandle() const {
213 DEBUG_ASSERT(registry->IsConsistent()); 213 DEBUG_ASSERT(registry->IsConsistent());
214 return handle; 214 return handle;
215} 215}
216 216
217Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 217std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
218 Maxwell::ShaderProgram program_type, ProgramCode code, 218 Maxwell::ShaderProgram program_type,
219 ProgramCode code_b) { 219 ProgramCode code, ProgramCode code_b) {
220 const auto shader_type = GetShaderType(program_type); 220 const auto shader_type = GetShaderType(program_type);
221 const std::size_t size_in_bytes = code.size() * sizeof(u64); 221 const std::size_t size_in_bytes = code.size() * sizeof(u64);
222 222
@@ -241,11 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
241 entry.bindless_samplers = registry->GetBindlessSamplers(); 241 entry.bindless_samplers = registry->GetBindlessSamplers();
242 params.disk_cache.SaveEntry(std::move(entry)); 242 params.disk_cache.SaveEntry(std::move(entry));
243 243
244 return std::shared_ptr<CachedShader>(new CachedShader( 244 return std::unique_ptr<Shader>(new Shader(
245 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); 245 std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
246} 246}
247 247
248Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 248std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
249 ProgramCode code) {
249 const std::size_t size_in_bytes = code.size() * sizeof(u64); 250 const std::size_t size_in_bytes = code.size() * sizeof(u64);
250 251
251 auto& engine = params.system.GPU().KeplerCompute(); 252 auto& engine = params.system.GPU().KeplerCompute();
@@ -265,22 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
265 entry.bindless_samplers = registry->GetBindlessSamplers(); 266 entry.bindless_samplers = registry->GetBindlessSamplers();
266 params.disk_cache.SaveEntry(std::move(entry)); 267 params.disk_cache.SaveEntry(std::move(entry));
267 268
268 return std::shared_ptr<CachedShader>(new CachedShader( 269 return std::unique_ptr<Shader>(new Shader(std::move(registry),
269 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); 270 MakeEntries(params.device, ir, ShaderType::Compute),
271 std::move(program)));
270} 272}
271 273
272Shader CachedShader::CreateFromCache(const ShaderParameters& params, 274std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
273 const PrecompiledShader& precompiled_shader, 275 const PrecompiledShader& precompiled_shader) {
274 std::size_t size_in_bytes) { 276 return std::unique_ptr<Shader>(new Shader(
275 return std::shared_ptr<CachedShader>( 277 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
276 new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
277 precompiled_shader.entries, precompiled_shader.program));
278} 278}
279 279
280ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 280ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
281 Core::Frontend::EmuWindow& emu_window, const Device& device) 281 Core::Frontend::EmuWindow& emu_window, const Device& device)
282 : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, 282 : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
283 disk_cache{system} {} 283 emu_window{emu_window}, device{device}, disk_cache{system} {}
284
285ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
284 286
285void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, 287void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
286 const VideoCore::DiskResourceLoadCallback& callback) { 288 const VideoCore::DiskResourceLoadCallback& callback) {
@@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
348 PrecompiledShader shader; 350 PrecompiledShader shader;
349 shader.program = std::move(program); 351 shader.program = std::move(program);
350 shader.registry = std::move(registry); 352 shader.registry = std::move(registry);
351 shader.entries = MakeEntries(ir); 353 shader.entries = MakeEntries(device, ir, entry.type);
352 354
353 std::scoped_lock lock{mutex}; 355 std::scoped_lock lock{mutex};
354 if (callback) { 356 if (callback) {
@@ -434,7 +436,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
434 return program; 436 return program;
435} 437}
436 438
437Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 439Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
438 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { 440 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
439 return last_shaders[static_cast<std::size_t>(program)]; 441 return last_shaders[static_cast<std::size_t>(program)];
440 } 442 }
@@ -444,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
444 446
445 // Look up shader in the cache based on address 447 // Look up shader in the cache based on address
446 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; 448 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
447 Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; 449 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
448 if (shader) {
449 return last_shaders[static_cast<std::size_t>(program)] = shader; 450 return last_shaders[static_cast<std::size_t>(program)] = shader;
450 } 451 }
451 452
@@ -459,62 +460,64 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
459 const u8* host_ptr_b = memory_manager.GetPointer(address_b); 460 const u8* host_ptr_b = memory_manager.GetPointer(address_b);
460 code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); 461 code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
461 } 462 }
463 const std::size_t code_size = code.size() * sizeof(u64);
462 464
463 const auto unique_identifier = GetUniqueIdentifier( 465 const u64 unique_identifier = GetUniqueIdentifier(
464 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 466 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
465 467
466 const ShaderParameters params{system, disk_cache, device, 468 const ShaderParameters params{system, disk_cache, device,
467 *cpu_addr, host_ptr, unique_identifier}; 469 *cpu_addr, host_ptr, unique_identifier};
468 470
471 std::unique_ptr<Shader> shader;
469 const auto found = runtime_cache.find(unique_identifier); 472 const auto found = runtime_cache.find(unique_identifier);
470 if (found == runtime_cache.end()) { 473 if (found == runtime_cache.end()) {
471 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), 474 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
472 std::move(code_b));
473 } else { 475 } else {
474 const std::size_t size_in_bytes = code.size() * sizeof(u64); 476 shader = Shader::CreateFromCache(params, found->second);
475 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
476 } 477 }
477 478
479 Shader* const result = shader.get();
478 if (cpu_addr) { 480 if (cpu_addr) {
479 Register(shader); 481 Register(std::move(shader), *cpu_addr, code_size);
480 } else { 482 } else {
481 null_shader = shader; 483 null_shader = std::move(shader);
482 } 484 }
483 485
484 return last_shaders[static_cast<std::size_t>(program)] = shader; 486 return last_shaders[static_cast<std::size_t>(program)] = result;
485} 487}
486 488
487Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 489Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
488 auto& memory_manager{system.GPU().MemoryManager()}; 490 auto& memory_manager{system.GPU().MemoryManager()};
489 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; 491 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
490 492
491 auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; 493 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
492 if (kernel) {
493 return kernel; 494 return kernel;
494 } 495 }
495 496
496 const auto host_ptr{memory_manager.GetPointer(code_addr)}; 497 const auto host_ptr{memory_manager.GetPointer(code_addr)};
497 // No kernel found, create a new one 498 // No kernel found, create a new one
498 auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; 499 ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
499 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 500 const std::size_t code_size{code.size() * sizeof(u64)};
501 const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
500 502
501 const ShaderParameters params{system, disk_cache, device, 503 const ShaderParameters params{system, disk_cache, device,
502 *cpu_addr, host_ptr, unique_identifier}; 504 *cpu_addr, host_ptr, unique_identifier};
503 505
506 std::unique_ptr<Shader> kernel;
504 const auto found = runtime_cache.find(unique_identifier); 507 const auto found = runtime_cache.find(unique_identifier);
505 if (found == runtime_cache.end()) { 508 if (found == runtime_cache.end()) {
506 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 509 kernel = Shader::CreateKernelFromMemory(params, std::move(code));
507 } else { 510 } else {
508 const std::size_t size_in_bytes = code.size() * sizeof(u64); 511 kernel = Shader::CreateFromCache(params, found->second);
509 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
510 } 512 }
511 513
514 Shader* const result = kernel.get();
512 if (cpu_addr) { 515 if (cpu_addr) {
513 Register(kernel); 516 Register(std::move(kernel), *cpu_addr, code_size);
514 } else { 517 } else {
515 null_kernel = kernel; 518 null_kernel = std::move(kernel);
516 } 519 }
517 return kernel; 520 return result;
518} 521}
519 522
520} // namespace OpenGL 523} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f9..994aaeaf2 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
18 18
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 20#include "video_core/engines/shader_type.h"
21#include "video_core/rasterizer_cache.h"
22#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/shader/registry.h" 24#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader_cache.h"
27 27
28namespace Core { 28namespace Core {
29class System; 29class System;
@@ -35,12 +35,9 @@ class EmuWindow;
35 35
36namespace OpenGL { 36namespace OpenGL {
37 37
38class CachedShader;
39class Device; 38class Device;
40class RasterizerOpenGL; 39class RasterizerOpenGL;
41struct UnspecializedShader;
42 40
43using Shader = std::shared_ptr<CachedShader>;
44using Maxwell = Tegra::Engines::Maxwell3D::Regs; 41using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45 42
46struct ProgramHandle { 43struct ProgramHandle {
@@ -64,62 +61,53 @@ struct ShaderParameters {
64 u64 unique_identifier; 61 u64 unique_identifier;
65}; 62};
66 63
67class CachedShader final : public RasterizerCacheObject { 64class Shader final {
68public: 65public:
69 ~CachedShader(); 66 ~Shader();
70 67
71 /// Gets the GL program handle for the shader 68 /// Gets the GL program handle for the shader
72 GLuint GetHandle() const; 69 GLuint GetHandle() const;
73 70
74 /// Returns the size in bytes of the shader
75 std::size_t GetSizeInBytes() const override {
76 return size_in_bytes;
77 }
78
79 /// Gets the shader entries for the shader 71 /// Gets the shader entries for the shader
80 const ShaderEntries& GetEntries() const { 72 const ShaderEntries& GetEntries() const {
81 return entries; 73 return entries;
82 } 74 }
83 75
84 static Shader CreateStageFromMemory(const ShaderParameters& params, 76 static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
85 Maxwell::ShaderProgram program_type, 77 Maxwell::ShaderProgram program_type,
86 ProgramCode program_code, ProgramCode program_code_b); 78 ProgramCode program_code,
87 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); 79 ProgramCode program_code_b);
80 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
81 ProgramCode code);
88 82
89 static Shader CreateFromCache(const ShaderParameters& params, 83 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
90 const PrecompiledShader& precompiled_shader, 84 const PrecompiledShader& precompiled_shader);
91 std::size_t size_in_bytes);
92 85
93private: 86private:
94 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 87 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
95 std::shared_ptr<VideoCommon::Shader::Registry> registry, 88 ProgramSharedPtr program);
96 ShaderEntries entries, ProgramSharedPtr program);
97 89
98 std::shared_ptr<VideoCommon::Shader::Registry> registry; 90 std::shared_ptr<VideoCommon::Shader::Registry> registry;
99 ShaderEntries entries; 91 ShaderEntries entries;
100 std::size_t size_in_bytes = 0;
101 ProgramSharedPtr program; 92 ProgramSharedPtr program;
102 GLuint handle = 0; 93 GLuint handle = 0;
103}; 94};
104 95
105class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 96class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
106public: 97public:
107 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 98 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
108 Core::Frontend::EmuWindow& emu_window, const Device& device); 99 Core::Frontend::EmuWindow& emu_window, const Device& device);
100 ~ShaderCacheOpenGL() override;
109 101
110 /// Loads disk cache for the current game 102 /// Loads disk cache for the current game
111 void LoadDiskCache(const std::atomic_bool& stop_loading, 103 void LoadDiskCache(const std::atomic_bool& stop_loading,
112 const VideoCore::DiskResourceLoadCallback& callback); 104 const VideoCore::DiskResourceLoadCallback& callback);
113 105
114 /// Gets the current specified shader stage program 106 /// Gets the current specified shader stage program
115 Shader GetStageProgram(Maxwell::ShaderProgram program); 107 Shader* GetStageProgram(Maxwell::ShaderProgram program);
116 108
117 /// Gets a compute kernel in the passed address 109 /// Gets a compute kernel in the passed address
118 Shader GetComputeKernel(GPUVAddr code_addr); 110 Shader* GetComputeKernel(GPUVAddr code_addr);
119
120protected:
121 // We do not have to flush this cache as things in it are never modified by us.
122 void FlushObjectInner(const Shader& object) override {}
123 111
124private: 112private:
125 ProgramSharedPtr GeneratePrecompiledProgram( 113 ProgramSharedPtr GeneratePrecompiledProgram(
@@ -132,10 +120,10 @@ private:
132 ShaderDiskCacheOpenGL disk_cache; 120 ShaderDiskCacheOpenGL disk_cache;
133 std::unordered_map<u64, PrecompiledShader> runtime_cache; 121 std::unordered_map<u64, PrecompiledShader> runtime_cache;
134 122
135 Shader null_shader{}; 123 std::unique_ptr<Shader> null_shader;
136 Shader null_kernel{}; 124 std::unique_ptr<Shader> null_kernel;
137 125
138 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 126 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
139}; 127};
140 128
141} // namespace OpenGL 129} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 9cb115959..2c49aeaac 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -37,6 +37,7 @@ using Tegra::Shader::IpaMode;
37using Tegra::Shader::IpaSampleMode; 37using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using Tegra::Shader::TextureType;
40using VideoCommon::Shader::BuildTransformFeedback; 41using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry; 42using VideoCommon::Shader::Registry;
42 43
@@ -61,8 +62,8 @@ struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>; 62using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; 63using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63 64
64constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 65constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); 66constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66 67
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt 68constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint 69#define ftou floatBitsToUint
@@ -402,6 +403,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 403 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
403} 404}
404 405
406bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
407 const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
408 // We waste one UBO for emulation
409 const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
410 return num_ubos > num_available_ubos;
411}
412
405struct GenericVaryingDescription { 413struct GenericVaryingDescription {
406 std::string name; 414 std::string name;
407 u8 first_element = 0; 415 u8 first_element = 0;
@@ -412,8 +420,9 @@ class GLSLDecompiler final {
412public: 420public:
413 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 421 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
414 ShaderType stage, std::string_view identifier, std::string_view suffix) 422 ShaderType stage, std::string_view identifier, std::string_view suffix)
415 : device{device}, ir{ir}, registry{registry}, stage{stage}, 423 : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
416 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { 424 suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
425 UseUnifiedUniforms(device, ir, stage)} {
417 if (stage != ShaderType::Compute) { 426 if (stage != ShaderType::Compute) {
418 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 427 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
419 } 428 }
@@ -518,6 +527,9 @@ private:
518 if (device.HasImageLoadFormatted()) { 527 if (device.HasImageLoadFormatted()) {
519 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); 528 code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
520 } 529 }
530 if (device.HasTextureShadowLod()) {
531 code.AddLine("#extension GL_EXT_texture_shadow_lod : require");
532 }
521 if (device.HasWarpIntrinsics()) { 533 if (device.HasWarpIntrinsics()) {
522 code.AddLine("#extension GL_NV_gpu_shader5 : require"); 534 code.AddLine("#extension GL_NV_gpu_shader5 : require");
523 code.AddLine("#extension GL_NV_shader_thread_group : require"); 535 code.AddLine("#extension GL_NV_shader_thread_group : require");
@@ -618,7 +630,9 @@ private:
618 break; 630 break;
619 } 631 }
620 } 632 }
621 if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { 633
634 if (stage != ShaderType::Geometry &&
635 (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) {
622 if (ir.UsesLayer()) { 636 if (ir.UsesLayer()) {
623 code.AddLine("int gl_Layer;"); 637 code.AddLine("int gl_Layer;");
624 } 638 }
@@ -647,6 +661,16 @@ private:
647 --code.scope; 661 --code.scope;
648 code.AddLine("}};"); 662 code.AddLine("}};");
649 code.AddNewLine(); 663 code.AddNewLine();
664
665 if (stage == ShaderType::Geometry) {
666 if (ir.UsesLayer()) {
667 code.AddLine("out int gl_Layer;");
668 }
669 if (ir.UsesViewportIndex()) {
670 code.AddLine("out int gl_ViewportIndex;");
671 }
672 }
673 code.AddNewLine();
650 } 674 }
651 675
652 void DeclareRegisters() { 676 void DeclareRegisters() {
@@ -834,12 +858,24 @@ private:
834 } 858 }
835 859
836 void DeclareConstantBuffers() { 860 void DeclareConstantBuffers() {
861 if (use_unified_uniforms) {
862 const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
863 static_cast<u32>(ir.GetGlobalMemory().size());
864 code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
865 binding);
866 code.AddLine(" uint cbufs[];");
867 code.AddLine("}};");
868 code.AddNewLine();
869 return;
870 }
871
837 u32 binding = device.GetBaseBindings(stage).uniform_buffer; 872 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
838 for (const auto& buffers : ir.GetConstantBuffers()) { 873 for (const auto [index, info] : ir.GetConstantBuffers()) {
839 const auto index = buffers.first; 874 const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
875 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
840 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, 876 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
841 GetConstBufferBlock(index)); 877 GetConstBufferBlock(index));
842 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); 878 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
843 code.AddLine("}};"); 879 code.AddLine("}};");
844 code.AddNewLine(); 880 code.AddNewLine();
845 } 881 }
@@ -877,13 +913,13 @@ private:
877 return "samplerBuffer"; 913 return "samplerBuffer";
878 } 914 }
879 switch (sampler.type) { 915 switch (sampler.type) {
880 case Tegra::Shader::TextureType::Texture1D: 916 case TextureType::Texture1D:
881 return "sampler1D"; 917 return "sampler1D";
882 case Tegra::Shader::TextureType::Texture2D: 918 case TextureType::Texture2D:
883 return "sampler2D"; 919 return "sampler2D";
884 case Tegra::Shader::TextureType::Texture3D: 920 case TextureType::Texture3D:
885 return "sampler3D"; 921 return "sampler3D";
886 case Tegra::Shader::TextureType::TextureCube: 922 case TextureType::TextureCube:
887 return "samplerCube"; 923 return "samplerCube";
888 default: 924 default:
889 UNREACHABLE(); 925 UNREACHABLE();
@@ -1038,42 +1074,51 @@ private:
1038 1074
1039 if (const auto cbuf = std::get_if<CbufNode>(&*node)) { 1075 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1040 const Node offset = cbuf->GetOffset(); 1076 const Node offset = cbuf->GetOffset();
1077 const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
1078
1041 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 1079 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1042 // Direct access 1080 // Direct access
1043 const u32 offset_imm = immediate->GetValue(); 1081 const u32 offset_imm = immediate->GetValue();
1044 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); 1082 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1045 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), 1083 if (use_unified_uniforms) {
1046 offset_imm / (4 * 4), (offset_imm / 4) % 4), 1084 return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
1047 Type::Uint}; 1085 Type::Uint};
1086 } else {
1087 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1088 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1089 Type::Uint};
1090 }
1048 } 1091 }
1049 1092
1050 if (std::holds_alternative<OperationNode>(*offset)) { 1093 // Indirect access
1051 // Indirect access 1094 if (use_unified_uniforms) {
1052 const std::string final_offset = code.GenerateTemporary(); 1095 return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
1053 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); 1096 Visit(offset).AsUint()),
1097 Type::Uint};
1098 }
1054 1099
1055 if (!device.HasComponentIndexingBug()) { 1100 const std::string final_offset = code.GenerateTemporary();
1056 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), 1101 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1057 final_offset, final_offset),
1058 Type::Uint};
1059 }
1060 1102
1061 // AMD's proprietary GLSL compiler emits ill code for variable component access. 1103 if (!device.HasComponentIndexingBug()) {
1062 // To bypass this driver bug generate 4 ifs, one per each component. 1104 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1063 const std::string pack = code.GenerateTemporary(); 1105 final_offset, final_offset),
1064 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), 1106 Type::Uint};
1065 final_offset);
1066
1067 const std::string result = code.GenerateTemporary();
1068 code.AddLine("uint {};", result);
1069 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1070 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
1071 pack, GetSwizzle(swizzle));
1072 }
1073 return {result, Type::Uint};
1074 } 1107 }
1075 1108
1076 UNREACHABLE_MSG("Unmanaged offset node type"); 1109 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1110 // To bypass this driver bug generate 4 ifs, one per each component.
1111 const std::string pack = code.GenerateTemporary();
1112 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1113 final_offset);
1114
1115 const std::string result = code.GenerateTemporary();
1116 code.AddLine("uint {};", result);
1117 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1118 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1119 GetSwizzle(swizzle));
1120 }
1121 return {result, Type::Uint};
1077 } 1122 }
1078 1123
1079 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1124 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
@@ -1339,8 +1384,19 @@ private:
1339 const std::size_t count = operation.GetOperandsCount(); 1384 const std::size_t count = operation.GetOperandsCount();
1340 const bool has_array = meta->sampler.is_array; 1385 const bool has_array = meta->sampler.is_array;
1341 const bool has_shadow = meta->sampler.is_shadow; 1386 const bool has_shadow = meta->sampler.is_shadow;
1387 const bool workaround_lod_array_shadow_as_grad =
1388 !device.HasTextureShadowLod() && function_suffix == "Lod" && meta->sampler.is_shadow &&
1389 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
1390 meta->sampler.type == TextureType::TextureCube);
1391
1392 std::string expr = "texture";
1393
1394 if (workaround_lod_array_shadow_as_grad) {
1395 expr += "Grad";
1396 } else {
1397 expr += function_suffix;
1398 }
1342 1399
1343 std::string expr = "texture" + function_suffix;
1344 if (!meta->aoffi.empty()) { 1400 if (!meta->aoffi.empty()) {
1345 expr += "Offset"; 1401 expr += "Offset";
1346 } else if (!meta->ptp.empty()) { 1402 } else if (!meta->ptp.empty()) {
@@ -1374,6 +1430,16 @@ private:
1374 expr += ')'; 1430 expr += ')';
1375 } 1431 }
1376 1432
1433 if (workaround_lod_array_shadow_as_grad) {
1434 switch (meta->sampler.type) {
1435 case TextureType::Texture2D:
1436 return expr + ", vec2(0.0), vec2(0.0))";
1437 case TextureType::TextureCube:
1438 return expr + ", vec3(0.0), vec3(0.0))";
1439 }
1440 UNREACHABLE();
1441 }
1442
1377 for (const auto& variant : extras) { 1443 for (const auto& variant : extras) {
1378 if (const auto argument = std::get_if<TextureArgument>(&variant)) { 1444 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1379 expr += GenerateTextureArgument(*argument); 1445 expr += GenerateTextureArgument(*argument);
@@ -2000,8 +2066,19 @@ private:
2000 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 2066 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
2001 ASSERT(meta); 2067 ASSERT(meta);
2002 2068
2003 std::string expr = GenerateTexture( 2069 std::string expr{};
2004 operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); 2070
2071 if (!device.HasTextureShadowLod() && meta->sampler.is_shadow &&
2072 ((meta->sampler.type == TextureType::Texture2D && meta->sampler.is_array) ||
2073 meta->sampler.type == TextureType::TextureCube)) {
2074 LOG_ERROR(Render_OpenGL,
2075 "Device lacks GL_EXT_texture_shadow_lod, using textureGrad as a workaround");
2076 expr = GenerateTexture(operation, "Lod", {});
2077 } else {
2078 expr = GenerateTexture(operation, "Lod",
2079 {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
2080 }
2081
2005 if (meta->sampler.is_shadow) { 2082 if (meta->sampler.is_shadow) {
2006 expr = "vec4(" + expr + ')'; 2083 expr = "vec4(" + expr + ')';
2007 } 2084 }
@@ -2710,6 +2787,7 @@ private:
2710 const std::string_view identifier; 2787 const std::string_view identifier;
2711 const std::string_view suffix; 2788 const std::string_view suffix;
2712 const Header header; 2789 const Header header;
2790 const bool use_unified_uniforms;
2713 std::unordered_map<u8, VaryingTFB> transform_feedback; 2791 std::unordered_map<u8, VaryingTFB> transform_feedback;
2714 2792
2715 ShaderWriter code; 2793 ShaderWriter code;
@@ -2905,7 +2983,7 @@ void GLSLDecompiler::DecompileAST() {
2905 2983
2906} // Anonymous namespace 2984} // Anonymous namespace
2907 2985
2908ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { 2986ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2909 ShaderEntries entries; 2987 ShaderEntries entries;
2910 for (const auto& cbuf : ir.GetConstantBuffers()) { 2988 for (const auto& cbuf : ir.GetConstantBuffers()) {
2911 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), 2989 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2926,6 +3004,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2926 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; 3004 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2927 } 3005 }
2928 entries.shader_length = ir.GetLength(); 3006 entries.shader_length = ir.GetLength();
3007 entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
2929 return entries; 3008 return entries;
2930} 3009}
2931 3010
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e8a178764..451c9689a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -53,11 +53,13 @@ struct ShaderEntries {
53 std::vector<GlobalMemoryEntry> global_memory_entries; 53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers; 54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images; 55 std::vector<ImageEntry> images;
56 u32 clip_distances{};
57 std::size_t shader_length{}; 56 std::size_t shader_length{};
57 u32 clip_distances{};
58 bool use_unified_uniforms{};
58}; 59};
59 60
60ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); 61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
61 63
62std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
63 const VideoCommon::Shader::Registry& registry, 65 const VideoCommon::Shader::Registry& registry,
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
29 29
30namespace { 30namespace {
31 31
32using VideoCommon::Shader::SeparateSamplerKey;
33
32using ShaderCacheVersionHash = std::array<u8, 64>; 34using ShaderCacheVersionHash = std::array<u8, 64>;
33 35
34struct ConstBufferKey { 36struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
37 u32 value = 0; 39 u32 value = 0;
38}; 40};
39 41
40struct BoundSamplerKey { 42struct BoundSamplerEntry {
41 u32 offset = 0; 43 u32 offset = 0;
42 Tegra::Engines::SamplerDescriptor sampler; 44 Tegra::Engines::SamplerDescriptor sampler;
43}; 45};
44 46
45struct BindlessSamplerKey { 47struct SeparateSamplerEntry {
48 u32 cbuf1 = 0;
49 u32 cbuf2 = 0;
50 u32 offset1 = 0;
51 u32 offset2 = 0;
52 Tegra::Engines::SamplerDescriptor sampler;
53};
54
55struct BindlessSamplerEntry {
46 u32 cbuf = 0; 56 u32 cbuf = 0;
47 u32 offset = 0; 57 u32 offset = 0;
48 Tegra::Engines::SamplerDescriptor sampler; 58 Tegra::Engines::SamplerDescriptor sampler;
49}; 59};
50 60
51constexpr u32 NativeVersion = 20; 61constexpr u32 NativeVersion = 21;
52 62
53ShaderCacheVersionHash GetShaderCacheVersionHash() { 63ShaderCacheVersionHash GetShaderCacheVersionHash() {
54 ShaderCacheVersionHash hash{}; 64 ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
87 u32 texture_handler_size_value; 97 u32 texture_handler_size_value;
88 u32 num_keys; 98 u32 num_keys;
89 u32 num_bound_samplers; 99 u32 num_bound_samplers;
100 u32 num_separate_samplers;
90 u32 num_bindless_samplers; 101 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || 102 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 || 103 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 || 104 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || 105 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || 106 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
107 file.ReadArray(&num_separate_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) { 108 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false; 109 return false;
98 } 110 }
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
101 } 113 }
102 114
103 std::vector<ConstBufferKey> flat_keys(num_keys); 115 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); 116 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); 117 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
118 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || 119 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != 120 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() || 121 flat_bound_samplers.size() ||
122 file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
123 flat_separate_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != 124 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) { 125 flat_bindless_samplers.size()) {
111 return false; 126 return false;
112 } 127 }
113 for (const auto& key : flat_keys) { 128 for (const auto& entry : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value}); 129 keys.insert({{entry.cbuf, entry.offset}, entry.value});
115 } 130 }
116 for (const auto& key : flat_bound_samplers) { 131 for (const auto& entry : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler); 132 bound_samplers.emplace(entry.offset, entry.sampler);
118 } 133 }
119 for (const auto& key : flat_bindless_samplers) { 134 for (const auto& entry : flat_separate_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); 135 SeparateSamplerKey key;
136 key.buffers = {entry.cbuf1, entry.cbuf2};
137 key.offsets = {entry.offset1, entry.offset2};
138 separate_samplers.emplace(key, entry.sampler);
139 }
140 for (const auto& entry : flat_bindless_samplers) {
141 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
121 } 142 }
122 143
123 return true; 144 return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || 163 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 || 164 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || 165 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
166 file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { 167 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false; 168 return false;
147 } 169 }
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); 174 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 } 175 }
154 176
155 std::vector<BoundSamplerKey> flat_bound_samplers; 177 std::vector<BoundSamplerEntry> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size()); 178 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) { 179 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); 180 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
181 }
182
183 std::vector<SeparateSamplerEntry> flat_separate_samplers;
184 flat_separate_samplers.reserve(separate_samplers.size());
185 for (const auto& [key, sampler] : separate_samplers) {
186 SeparateSamplerEntry entry;
187 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
188 std::tie(entry.offset1, entry.offset2) = key.offsets;
189 entry.sampler = sampler;
190 flat_separate_samplers.push_back(entry);
159 } 191 }
160 192
161 std::vector<BindlessSamplerKey> flat_bindless_samplers; 193 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size()); 194 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) { 195 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back( 196 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler}); 197 BindlessSamplerEntry{address.first, address.second, sampler});
166 } 198 }
167 199
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && 200 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == 201 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() && 202 flat_bound_samplers.size() &&
203 file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
204 flat_separate_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == 205 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size(); 206 flat_bindless_samplers.size();
173} 207}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
57 VideoCommon::Shader::ComputeInfo compute_info; 57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys; 58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers; 59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers; 61 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
61}; 62};
62 63
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 6ec328c53..3655ff629 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -2,11 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <deque> 5#include <tuple>
6#include <vector> 6#include <vector>
7
7#include "common/alignment.h" 8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/renderer_opengl/gl_device.h"
10#include "video_core/renderer_opengl/gl_stream_buffer.h" 12#include "video_core/renderer_opengl/gl_stream_buffer.h"
11 13
12MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", 14MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
@@ -14,8 +16,7 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
14 16
15namespace OpenGL { 17namespace OpenGL {
16 18
17OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent, 19OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
18 bool use_persistent)
19 : buffer_size(size) { 20 : buffer_size(size) {
20 gl_buffer.Create(); 21 gl_buffer.Create();
21 22
@@ -29,34 +30,22 @@ OGLStreamBuffer::OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool p
29 allocate_size *= 2; 30 allocate_size *= 2;
30 } 31 }
31 32
32 if (use_persistent) { 33 static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
33 persistent = true; 34 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
34 coherent = prefer_coherent; 35 mapped_ptr = static_cast<u8*>(
35 const GLbitfield flags = 36 glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
36 GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); 37
37 glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags); 38 if (device.HasVertexBufferUnifiedMemory()) {
38 mapped_ptr = static_cast<u8*>(glMapNamedBufferRange( 39 glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
39 gl_buffer.handle, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); 40 glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address);
40 } else {
41 glNamedBufferData(gl_buffer.handle, allocate_size, nullptr, GL_STREAM_DRAW);
42 } 41 }
43} 42}
44 43
45OGLStreamBuffer::~OGLStreamBuffer() { 44OGLStreamBuffer::~OGLStreamBuffer() {
46 if (persistent) { 45 glUnmapNamedBuffer(gl_buffer.handle);
47 glUnmapNamedBuffer(gl_buffer.handle);
48 }
49 gl_buffer.Release(); 46 gl_buffer.Release();
50} 47}
51 48
52GLuint OGLStreamBuffer::GetHandle() const {
53 return gl_buffer.handle;
54}
55
56GLsizeiptr OGLStreamBuffer::GetSize() const {
57 return buffer_size;
58}
59
60std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { 49std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
61 ASSERT(size <= buffer_size); 50 ASSERT(size <= buffer_size);
62 ASSERT(alignment <= buffer_size); 51 ASSERT(alignment <= buffer_size);
@@ -68,36 +57,21 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
68 57
69 bool invalidate = false; 58 bool invalidate = false;
70 if (buffer_pos + size > buffer_size) { 59 if (buffer_pos + size > buffer_size) {
60 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
61 glInvalidateBufferData(gl_buffer.handle);
62
71 buffer_pos = 0; 63 buffer_pos = 0;
72 invalidate = true; 64 invalidate = true;
73
74 if (persistent) {
75 glUnmapNamedBuffer(gl_buffer.handle);
76 }
77 } 65 }
78 66
79 if (invalidate || !persistent) { 67 return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
80 MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
81 GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
82 (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
83 (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
84 mapped_ptr = static_cast<u8*>(
85 glMapNamedBufferRange(gl_buffer.handle, buffer_pos, buffer_size - buffer_pos, flags));
86 mapped_offset = buffer_pos;
87 }
88
89 return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
90} 68}
91 69
92void OGLStreamBuffer::Unmap(GLsizeiptr size) { 70void OGLStreamBuffer::Unmap(GLsizeiptr size) {
93 ASSERT(size <= mapped_size); 71 ASSERT(size <= mapped_size);
94 72
95 if (!coherent && size > 0) { 73 if (size > 0) {
96 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos - mapped_offset, size); 74 glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size);
97 }
98
99 if (!persistent) {
100 glUnmapNamedBuffer(gl_buffer.handle);
101 } 75 }
102 76
103 buffer_pos += size; 77 buffer_pos += size;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index f8383cbd4..307a67113 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -11,15 +11,13 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class Device;
15
14class OGLStreamBuffer : private NonCopyable { 16class OGLStreamBuffer : private NonCopyable {
15public: 17public:
16 explicit OGLStreamBuffer(GLsizeiptr size, bool vertex_data_usage, bool prefer_coherent = false, 18 explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
17 bool use_persistent = true);
18 ~OGLStreamBuffer(); 19 ~OGLStreamBuffer();
19 20
20 GLuint GetHandle() const;
21 GLsizeiptr GetSize() const;
22
23 /* 21 /*
24 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes 22 * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
25 * and the optional alignment requirement. 23 * and the optional alignment requirement.
@@ -32,15 +30,24 @@ public:
32 30
33 void Unmap(GLsizeiptr size); 31 void Unmap(GLsizeiptr size);
34 32
33 GLuint Handle() const {
34 return gl_buffer.handle;
35 }
36
37 u64 Address() const {
38 return gpu_address;
39 }
40
41 GLsizeiptr Size() const noexcept {
42 return buffer_size;
43 }
44
35private: 45private:
36 OGLBuffer gl_buffer; 46 OGLBuffer gl_buffer;
37 47
38 bool coherent = false; 48 GLuint64EXT gpu_address = 0;
39 bool persistent = false;
40
41 GLintptr buffer_pos = 0; 49 GLintptr buffer_pos = 0;
42 GLsizeiptr buffer_size = 0; 50 GLsizeiptr buffer_size = 0;
43 GLintptr mapped_offset = 0;
44 GLsizeiptr mapped_size = 0; 51 GLsizeiptr mapped_size = 0;
45 u8* mapped_ptr = nullptr; 52 u8* mapped_ptr = nullptr;
46}; 53};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 4faa8b90c..61505879b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -263,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param
263 target = GetTextureTarget(params.target); 263 target = GetTextureTarget(params.target);
264 texture = CreateTexture(params, target, internal_format, texture_buffer); 264 texture = CreateTexture(params, target, internal_format, texture_buffer);
265 DecorateSurfaceName(); 265 DecorateSurfaceName();
266 main_view = CreateViewInner( 266
267 ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), 267 u32 num_layers = 1;
268 true); 268 if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
269 num_layers = params.depth;
270 }
271
272 main_view =
273 CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true);
269} 274}
270 275
271CachedSurface::~CachedSurface() = default; 276CachedSurface::~CachedSurface() = default;
@@ -404,8 +409,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
404 409
405CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, 410CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
406 bool is_proxy) 411 bool is_proxy)
407 : VideoCommon::ViewBase(params), surface{surface}, 412 : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
408 format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
409 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { 413 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
410 if (!is_proxy) { 414 if (!is_proxy) {
411 main_view = CreateTextureView(); 415 main_view = CreateTextureView();
@@ -414,20 +418,23 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
414 418
415CachedSurfaceView::~CachedSurfaceView() = default; 419CachedSurfaceView::~CachedSurfaceView() = default;
416 420
417void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { 421void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const {
418 ASSERT(params.num_levels == 1); 422 ASSERT(params.num_levels == 1);
419 423
424 if (params.target == SurfaceTarget::Texture3D) {
425 if (params.num_layers > 1) {
426 ASSERT(params.base_layer == 0);
427 glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level);
428 } else {
429 glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle,
430 params.base_level, params.base_layer);
431 }
432 return;
433 }
434
420 if (params.num_layers > 1) { 435 if (params.num_layers > 1) {
421 // Layered framebuffer attachments
422 UNIMPLEMENTED_IF(params.base_layer != 0); 436 UNIMPLEMENTED_IF(params.base_layer != 0);
423 437 glFramebufferTexture(fb_target, attachment, GetTexture(), 0);
424 switch (params.target) {
425 case SurfaceTarget::Texture2DArray:
426 glFramebufferTexture(target, attachment, GetTexture(), 0);
427 break;
428 default:
429 UNIMPLEMENTED();
430 }
431 return; 438 return;
432 } 439 }
433 440
@@ -435,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
435 const GLuint texture = surface.GetTexture(); 442 const GLuint texture = surface.GetTexture();
436 switch (surface.GetSurfaceParams().target) { 443 switch (surface.GetSurfaceParams().target) {
437 case SurfaceTarget::Texture1D: 444 case SurfaceTarget::Texture1D:
438 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); 445 glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level);
439 break; 446 break;
440 case SurfaceTarget::Texture2D: 447 case SurfaceTarget::Texture2D:
441 glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); 448 glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level);
442 break; 449 break;
443 case SurfaceTarget::Texture1DArray: 450 case SurfaceTarget::Texture1DArray:
444 case SurfaceTarget::Texture2DArray: 451 case SurfaceTarget::Texture2DArray:
445 case SurfaceTarget::TextureCubemap: 452 case SurfaceTarget::TextureCubemap:
446 case SurfaceTarget::TextureCubeArray: 453 case SurfaceTarget::TextureCubeArray:
447 glFramebufferTextureLayer(target, attachment, texture, params.base_level, 454 glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level,
448 params.base_layer); 455 params.base_layer);
449 break; 456 break;
450 default: 457 default:
@@ -501,8 +508,13 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
501 OGLTextureView texture_view; 508 OGLTextureView texture_view;
502 texture_view.Create(); 509 texture_view.Create();
503 510
504 glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, 511 if (target == GL_TEXTURE_3D) {
505 params.num_levels, params.base_layer, params.num_layers); 512 glTextureView(texture_view.handle, target, surface.texture.handle, format,
513 params.base_level, params.num_levels, 0, 1);
514 } else {
515 glTextureView(texture_view.handle, target, surface.texture.handle, format,
516 params.base_level, params.num_levels, params.base_layer, params.num_layers);
517 }
506 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); 518 ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
507 519
508 return texture_view; 520 return texture_view;
@@ -545,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
545 const Tegra::Engines::Fermi2D::Config& copy_config) { 557 const Tegra::Engines::Fermi2D::Config& copy_config) {
546 const auto& src_params{src_view->GetSurfaceParams()}; 558 const auto& src_params{src_view->GetSurfaceParams()};
547 const auto& dst_params{dst_view->GetSurfaceParams()}; 559 const auto& dst_params{dst_view->GetSurfaceParams()};
548 UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); 560 UNIMPLEMENTED_IF(src_params.depth != 1);
549 UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); 561 UNIMPLEMENTED_IF(dst_params.depth != 1);
550 562
551 state_tracker.NotifyScissor0(); 563 state_tracker.NotifyScissor0();
552 state_tracker.NotifyFramebuffer(); 564 state_tracker.NotifyFramebuffer();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8a2ac8603..bfc4ddf5d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -80,8 +80,10 @@ public:
80 explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); 80 explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy);
81 ~CachedSurfaceView(); 81 ~CachedSurfaceView();
82 82
83 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER 83 /// @brief Attaches this texture view to the currently bound fb_target framebuffer
84 void Attach(GLenum attachment, GLenum target) const; 84 /// @param attachment Attachment to bind textures to
85 /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
86 void Attach(GLenum attachment, GLenum fb_target) const;
85 87
86 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, 88 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
87 Tegra::Texture::SwizzleSource y_source, 89 Tegra::Texture::SwizzleSource y_source,
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 994ae98eb..35e329240 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -46,10 +46,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
46 return GL_UNSIGNED_INT; 46 return GL_UNSIGNED_INT;
47 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 47 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
48 return GL_UNSIGNED_INT_2_10_10_10_REV; 48 return GL_UNSIGNED_INT_2_10_10_10_REV;
49 default:
50 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
51 return {};
52 } 49 }
50 break;
53 case Maxwell::VertexAttribute::Type::SignedInt: 51 case Maxwell::VertexAttribute::Type::SignedInt:
54 case Maxwell::VertexAttribute::Type::SignedNorm: 52 case Maxwell::VertexAttribute::Type::SignedNorm:
55 switch (attrib.size) { 53 switch (attrib.size) {
@@ -70,10 +68,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
70 return GL_INT; 68 return GL_INT;
71 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 69 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
72 return GL_INT_2_10_10_10_REV; 70 return GL_INT_2_10_10_10_REV;
73 default:
74 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
75 return {};
76 } 71 }
72 break;
77 case Maxwell::VertexAttribute::Type::Float: 73 case Maxwell::VertexAttribute::Type::Float:
78 switch (attrib.size) { 74 switch (attrib.size) {
79 case Maxwell::VertexAttribute::Size::Size_16: 75 case Maxwell::VertexAttribute::Size::Size_16:
@@ -86,10 +82,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
86 case Maxwell::VertexAttribute::Size::Size_32_32_32: 82 case Maxwell::VertexAttribute::Size::Size_32_32_32:
87 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 83 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
88 return GL_FLOAT; 84 return GL_FLOAT;
89 default:
90 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
91 return {};
92 } 85 }
86 break;
93 case Maxwell::VertexAttribute::Type::UnsignedScaled: 87 case Maxwell::VertexAttribute::Type::UnsignedScaled:
94 switch (attrib.size) { 88 switch (attrib.size) {
95 case Maxwell::VertexAttribute::Size::Size_8: 89 case Maxwell::VertexAttribute::Size::Size_8:
@@ -102,10 +96,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
102 case Maxwell::VertexAttribute::Size::Size_16_16_16: 96 case Maxwell::VertexAttribute::Size::Size_16_16_16:
103 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 97 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
104 return GL_UNSIGNED_SHORT; 98 return GL_UNSIGNED_SHORT;
105 default:
106 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
107 return {};
108 } 99 }
100 break;
109 case Maxwell::VertexAttribute::Type::SignedScaled: 101 case Maxwell::VertexAttribute::Type::SignedScaled:
110 switch (attrib.size) { 102 switch (attrib.size) {
111 case Maxwell::VertexAttribute::Size::Size_8: 103 case Maxwell::VertexAttribute::Size::Size_8:
@@ -118,14 +110,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
118 case Maxwell::VertexAttribute::Size::Size_16_16_16: 110 case Maxwell::VertexAttribute::Size::Size_16_16_16:
119 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 111 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
120 return GL_SHORT; 112 return GL_SHORT;
121 default:
122 LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
123 return {};
124 } 113 }
125 default: 114 break;
126 LOG_ERROR(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
127 return {};
128 } 115 }
116 UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
117 attrib.SizeString());
118 return {};
129} 119}
130 120
131inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { 121inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
@@ -137,8 +127,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
137 case Maxwell::IndexFormat::UnsignedInt: 127 case Maxwell::IndexFormat::UnsignedInt:
138 return GL_UNSIGNED_INT; 128 return GL_UNSIGNED_INT;
139 } 129 }
140 LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); 130 UNREACHABLE_MSG("Invalid index_format={}", static_cast<u32>(index_format));
141 UNREACHABLE();
142 return {}; 131 return {};
143} 132}
144 133
@@ -180,33 +169,32 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
180} 169}
181 170
182inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, 171inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
183 Tegra::Texture::TextureMipmapFilter mip_filter_mode) { 172 Tegra::Texture::TextureMipmapFilter mipmap_filter_mode) {
184 switch (filter_mode) { 173 switch (filter_mode) {
185 case Tegra::Texture::TextureFilter::Linear: { 174 case Tegra::Texture::TextureFilter::Nearest:
186 switch (mip_filter_mode) { 175 switch (mipmap_filter_mode) {
187 case Tegra::Texture::TextureMipmapFilter::None: 176 case Tegra::Texture::TextureMipmapFilter::None:
188 return GL_LINEAR; 177 return GL_NEAREST;
189 case Tegra::Texture::TextureMipmapFilter::Nearest: 178 case Tegra::Texture::TextureMipmapFilter::Nearest:
190 return GL_LINEAR_MIPMAP_NEAREST; 179 return GL_NEAREST_MIPMAP_NEAREST;
191 case Tegra::Texture::TextureMipmapFilter::Linear: 180 case Tegra::Texture::TextureMipmapFilter::Linear:
192 return GL_LINEAR_MIPMAP_LINEAR; 181 return GL_NEAREST_MIPMAP_LINEAR;
193 } 182 }
194 break; 183 break;
195 } 184 case Tegra::Texture::TextureFilter::Linear:
196 case Tegra::Texture::TextureFilter::Nearest: { 185 switch (mipmap_filter_mode) {
197 switch (mip_filter_mode) {
198 case Tegra::Texture::TextureMipmapFilter::None: 186 case Tegra::Texture::TextureMipmapFilter::None:
199 return GL_NEAREST; 187 return GL_LINEAR;
200 case Tegra::Texture::TextureMipmapFilter::Nearest: 188 case Tegra::Texture::TextureMipmapFilter::Nearest:
201 return GL_NEAREST_MIPMAP_NEAREST; 189 return GL_LINEAR_MIPMAP_NEAREST;
202 case Tegra::Texture::TextureMipmapFilter::Linear: 190 case Tegra::Texture::TextureMipmapFilter::Linear:
203 return GL_NEAREST_MIPMAP_LINEAR; 191 return GL_LINEAR_MIPMAP_LINEAR;
204 } 192 }
205 break; 193 break;
206 } 194 }
207 } 195 UNREACHABLE_MSG("Invalid texture filter mode={} and mipmap filter mode={}",
208 LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); 196 static_cast<u32>(filter_mode), static_cast<u32>(mipmap_filter_mode));
209 return GL_LINEAR; 197 return GL_NEAREST;
210} 198}
211 199
212inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { 200inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
@@ -229,10 +217,9 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
229 } else { 217 } else {
230 return GL_MIRROR_CLAMP_TO_EDGE; 218 return GL_MIRROR_CLAMP_TO_EDGE;
231 } 219 }
232 default:
233 LOG_ERROR(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
234 return GL_REPEAT;
235 } 220 }
221 UNIMPLEMENTED_MSG("Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode));
222 return GL_REPEAT;
236} 223}
237 224
238inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) { 225inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
@@ -254,8 +241,7 @@ inline GLenum DepthCompareFunc(Tegra::Texture::DepthCompareFunc func) {
254 case Tegra::Texture::DepthCompareFunc::Always: 241 case Tegra::Texture::DepthCompareFunc::Always:
255 return GL_ALWAYS; 242 return GL_ALWAYS;
256 } 243 }
257 LOG_ERROR(Render_OpenGL, "Unimplemented texture depth compare function ={}", 244 UNIMPLEMENTED_MSG("Unimplemented texture depth compare function={}", static_cast<u32>(func));
258 static_cast<u32>(func));
259 return GL_GREATER; 245 return GL_GREATER;
260} 246}
261 247
@@ -277,7 +263,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
277 case Maxwell::Blend::Equation::MaxGL: 263 case Maxwell::Blend::Equation::MaxGL:
278 return GL_MAX; 264 return GL_MAX;
279 } 265 }
280 LOG_ERROR(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); 266 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
281 return GL_FUNC_ADD; 267 return GL_FUNC_ADD;
282} 268}
283 269
@@ -341,7 +327,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
341 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: 327 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
342 return GL_ONE_MINUS_CONSTANT_ALPHA; 328 return GL_ONE_MINUS_CONSTANT_ALPHA;
343 } 329 }
344 LOG_ERROR(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); 330 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
345 return GL_ZERO; 331 return GL_ZERO;
346} 332}
347 333
@@ -361,7 +347,7 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
361 case Tegra::Texture::SwizzleSource::OneFloat: 347 case Tegra::Texture::SwizzleSource::OneFloat:
362 return GL_ONE; 348 return GL_ONE;
363 } 349 }
364 LOG_ERROR(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); 350 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(source));
365 return GL_ZERO; 351 return GL_ZERO;
366} 352}
367 353
@@ -392,7 +378,7 @@ inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) {
392 case Maxwell::ComparisonOp::AlwaysOld: 378 case Maxwell::ComparisonOp::AlwaysOld:
393 return GL_ALWAYS; 379 return GL_ALWAYS;
394 } 380 }
395 LOG_ERROR(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); 381 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
396 return GL_ALWAYS; 382 return GL_ALWAYS;
397} 383}
398 384
@@ -423,7 +409,7 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
423 case Maxwell::StencilOp::DecrWrapOGL: 409 case Maxwell::StencilOp::DecrWrapOGL:
424 return GL_DECR_WRAP; 410 return GL_DECR_WRAP;
425 } 411 }
426 LOG_ERROR(Render_OpenGL, "Unimplemented stencil op={}", static_cast<u32>(stencil)); 412 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil));
427 return GL_KEEP; 413 return GL_KEEP;
428} 414}
429 415
@@ -434,7 +420,7 @@ inline GLenum FrontFace(Maxwell::FrontFace front_face) {
434 case Maxwell::FrontFace::CounterClockWise: 420 case Maxwell::FrontFace::CounterClockWise:
435 return GL_CCW; 421 return GL_CCW;
436 } 422 }
437 LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); 423 UNIMPLEMENTED_MSG("Unimplemented front face cull={}", static_cast<u32>(front_face));
438 return GL_CCW; 424 return GL_CCW;
439} 425}
440 426
@@ -447,7 +433,7 @@ inline GLenum CullFace(Maxwell::CullFace cull_face) {
447 case Maxwell::CullFace::FrontAndBack: 433 case Maxwell::CullFace::FrontAndBack:
448 return GL_FRONT_AND_BACK; 434 return GL_FRONT_AND_BACK;
449 } 435 }
450 LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); 436 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
451 return GL_BACK; 437 return GL_BACK;
452} 438}
453 439
@@ -486,7 +472,7 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
486 case Maxwell::LogicOperation::Set: 472 case Maxwell::LogicOperation::Set:
487 return GL_SET; 473 return GL_SET;
488 } 474 }
489 LOG_ERROR(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); 475 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(operation));
490 return GL_COPY; 476 return GL_COPY;
491} 477}
492 478
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6b489e6db..c40adb6e7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -488,6 +488,15 @@ void RendererOpenGL::InitOpenGLObjects() {
488 488
489 // Clear screen to black 489 // Clear screen to black
490 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); 490 LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
491
492 // Enable unified vertex attributes and query vertex buffer address when the driver supports it
493 if (device.HasVertexBufferUnifiedMemory()) {
494 glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
495
496 glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY);
497 glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV,
498 &vertex_buffer_address);
499 }
491} 500}
492 501
493void RendererOpenGL::AddTelemetryFields() { 502void RendererOpenGL::AddTelemetryFields() {
@@ -656,7 +665,13 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
656 offsetof(ScreenRectVertex, tex_coord)); 665 offsetof(ScreenRectVertex, tex_coord));
657 glVertexAttribBinding(PositionLocation, 0); 666 glVertexAttribBinding(PositionLocation, 0);
658 glVertexAttribBinding(TexCoordLocation, 0); 667 glVertexAttribBinding(TexCoordLocation, 0);
659 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); 668 if (device.HasVertexBufferUnifiedMemory()) {
669 glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex));
670 glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address,
671 sizeof(vertices));
672 } else {
673 glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
674 }
660 675
661 glBindTextureUnit(0, screen_info.display_texture); 676 glBindTextureUnit(0, screen_info.display_texture);
662 glBindSampler(0, 0); 677 glBindSampler(0, 0);
@@ -751,8 +766,9 @@ void RendererOpenGL::RenderScreenshot() {
751} 766}
752 767
753bool RendererOpenGL::Init() { 768bool RendererOpenGL::Init() {
754 if (GLAD_GL_KHR_debug) { 769 if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
755 glEnable(GL_DEBUG_OUTPUT); 770 glEnable(GL_DEBUG_OUTPUT);
771 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
756 glDebugMessageCallback(DebugHandler, nullptr); 772 glDebugMessageCallback(DebugHandler, nullptr);
757 } 773 }
758 774
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 61bf507f4..8b18d32e6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -107,6 +107,9 @@ private:
107 OGLPipeline pipeline; 107 OGLPipeline pipeline;
108 OGLFramebuffer screenshot_framebuffer; 108 OGLFramebuffer screenshot_framebuffer;
109 109
110 // GPU address of the vertex buffer
111 GLuint64EXT vertex_buffer_address = 0;
112
110 /// Display information for Switch screen 113 /// Display information for Switch screen
111 ScreenInfo screen_info; 114 ScreenInfo screen_info;
112 115
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 568744e3c..424278816 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
72 72
73 u32 packed_front_face = PackFrontFace(regs.front_face); 73 u32 packed_front_face = PackFrontFace(regs.front_face);
74 if (regs.screen_y_control.triangle_rast_flip != 0 && 74 if (regs.screen_y_control.triangle_rast_flip != 0) {
75 regs.viewport_transform[0].scale_y > 0.0f) {
76 // Flip front face 75 // Flip front face
77 packed_front_face = 1 - packed_front_face; 76 packed_front_face = 1 - packed_front_face;
78 } 77 }
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 2871035f5..1f2b6734b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -21,29 +21,29 @@ namespace Sampler {
21 21
22VkFilter Filter(Tegra::Texture::TextureFilter filter) { 22VkFilter Filter(Tegra::Texture::TextureFilter filter) {
23 switch (filter) { 23 switch (filter) {
24 case Tegra::Texture::TextureFilter::Linear:
25 return VK_FILTER_LINEAR;
26 case Tegra::Texture::TextureFilter::Nearest: 24 case Tegra::Texture::TextureFilter::Nearest:
27 return VK_FILTER_NEAREST; 25 return VK_FILTER_NEAREST;
26 case Tegra::Texture::TextureFilter::Linear:
27 return VK_FILTER_LINEAR;
28 } 28 }
29 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); 29 UNREACHABLE_MSG("Invalid sampler filter={}", static_cast<u32>(filter));
30 return {}; 30 return {};
31} 31}
32 32
33VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { 33VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
34 switch (mipmap_filter) { 34 switch (mipmap_filter) {
35 case Tegra::Texture::TextureMipmapFilter::None: 35 case Tegra::Texture::TextureMipmapFilter::None:
36 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping 36 // There are no Vulkan filter modes that directly correspond to OpenGL minification filters
37 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to 37 // of GL_LINEAR or GL_NEAREST, but they can be emulated using
38 // use an image view with a single mipmap level to emulate this. 38 // VK_SAMPLER_MIPMAP_MODE_NEAREST, minLod = 0, and maxLod = 0.25, and using minFilter =
39 return VK_SAMPLER_MIPMAP_MODE_LINEAR; 39 // VK_FILTER_LINEAR or minFilter = VK_FILTER_NEAREST, respectively.
40 ; 40 return VK_SAMPLER_MIPMAP_MODE_NEAREST;
41 case Tegra::Texture::TextureMipmapFilter::Linear:
42 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
43 case Tegra::Texture::TextureMipmapFilter::Nearest: 41 case Tegra::Texture::TextureMipmapFilter::Nearest:
44 return VK_SAMPLER_MIPMAP_MODE_NEAREST; 42 return VK_SAMPLER_MIPMAP_MODE_NEAREST;
43 case Tegra::Texture::TextureMipmapFilter::Linear:
44 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
45 } 45 }
46 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); 46 UNREACHABLE_MSG("Invalid sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
47 return {}; 47 return {};
48} 48}
49 49
@@ -78,10 +78,9 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
78 case Tegra::Texture::WrapMode::MirrorOnceBorder: 78 case Tegra::Texture::WrapMode::MirrorOnceBorder:
79 UNIMPLEMENTED(); 79 UNIMPLEMENTED();
80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; 80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
81 default:
82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
83 return {};
84 } 81 }
82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
83 return {};
85} 84}
86 85
87VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { 86VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -149,7 +148,7 @@ struct FormatTuple {
149 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F 148 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
150 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U 149 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
151 {VK_FORMAT_UNDEFINED}, // R16S 150 {VK_FORMAT_UNDEFINED}, // R16S
152 {VK_FORMAT_UNDEFINED}, // R16UI 151 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
153 {VK_FORMAT_UNDEFINED}, // R16I 152 {VK_FORMAT_UNDEFINED}, // R16I
154 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 153 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
155 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F 154 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
@@ -288,10 +287,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
288 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 287 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
289 case Maxwell::PrimitiveTopology::Patches: 288 case Maxwell::PrimitiveTopology::Patches:
290 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; 289 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
291 default:
292 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
293 return {};
294 } 290 }
291 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
292 return {};
295} 293}
296 294
297VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { 295VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 59b441943..cd9673d1f 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -13,6 +13,7 @@
13#include <fmt/format.h> 13#include <fmt/format.h>
14 14
15#include "common/dynamic_library.h" 15#include "common/dynamic_library.h"
16#include "common/file_util.h"
16#include "common/logging/log.h" 17#include "common/logging/log.h"
17#include "common/telemetry.h" 18#include "common/telemetry.h"
18#include "core/core.h" 19#include "core/core.h"
@@ -76,7 +77,8 @@ Common::DynamicLibrary OpenVulkanLibrary() {
76 char* libvulkan_env = getenv("LIBVULKAN_PATH"); 77 char* libvulkan_env = getenv("LIBVULKAN_PATH");
77 if (!libvulkan_env || !library.Open(libvulkan_env)) { 78 if (!libvulkan_env || !library.Open(libvulkan_env)) {
78 // Use the libvulkan.dylib from the application bundle. 79 // Use the libvulkan.dylib from the application bundle.
79 std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; 80 const std::string filename =
81 FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
80 library.Open(filename.c_str()); 82 library.Open(filename.c_str());
81 } 83 }
82#else 84#else
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5f33d9e40..f10f96cd8 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -37,9 +37,9 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch
37 37
38} // Anonymous namespace 38} // Anonymous namespace
39 39
40CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 40Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
41 VAddr cpu_addr, std::size_t size) 41 VKStagingBufferPool& staging_pool_, VAddr cpu_addr, std::size_t size)
42 : VideoCommon::BufferBlock{cpu_addr, size} { 42 : VideoCommon::BufferBlock{cpu_addr, size}, scheduler{scheduler_}, staging_pool{staging_pool_} {
43 VkBufferCreateInfo ci; 43 VkBufferCreateInfo ci;
44 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 44 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
45 ci.pNext = nullptr; 45 ci.pNext = nullptr;
@@ -54,46 +54,17 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me
54 buffer.commit = memory_manager.Commit(buffer.handle, false); 54 buffer.commit = memory_manager.Commit(buffer.handle, false);
55} 55}
56 56
57CachedBufferBlock::~CachedBufferBlock() = default; 57Buffer::~Buffer() = default;
58 58
59VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 59void Buffer::Upload(std::size_t offset, std::size_t size, const u8* data) const {
60 const VKDevice& device, VKMemoryManager& memory_manager,
61 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
62 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
63 CreateStreamBuffer(device,
64 scheduler)},
65 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
66 staging_pool} {}
67
68VKBufferCache::~VKBufferCache() = default;
69
70Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
71 return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
72}
73
74VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
75 return buffer->GetHandle();
76}
77
78VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
79 size = std::max(size, std::size_t(4));
80 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
81 scheduler.RequestOutsideRenderPassOperationContext();
82 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
83 cmdbuf.FillBuffer(buffer, 0, size, 0);
84 });
85 return *empty.handle;
86}
87
88void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
89 const u8* data) {
90 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 60 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
91 std::memcpy(staging.commit->Map(size), data, size); 61 std::memcpy(staging.commit->Map(size), data, size);
92 62
93 scheduler.RequestOutsideRenderPassOperationContext(); 63 scheduler.RequestOutsideRenderPassOperationContext();
94 scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, 64
95 size](vk::CommandBuffer cmdbuf) { 65 const VkBuffer handle = Handle();
96 cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); 66 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
67 cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, size});
97 68
98 VkBufferMemoryBarrier barrier; 69 VkBufferMemoryBarrier barrier;
99 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 70 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@@ -102,7 +73,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
102 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; 73 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
103 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 74 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
104 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 75 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
105 barrier.buffer = buffer; 76 barrier.buffer = handle;
106 barrier.offset = offset; 77 barrier.offset = offset;
107 barrier.size = size; 78 barrier.size = size;
108 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, 79 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
@@ -110,12 +81,12 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
110 }); 81 });
111} 82}
112 83
113void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 84void Buffer::Download(std::size_t offset, std::size_t size, u8* data) const {
114 u8* data) {
115 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 85 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
116 scheduler.RequestOutsideRenderPassOperationContext(); 86 scheduler.RequestOutsideRenderPassOperationContext();
117 scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, 87
118 size](vk::CommandBuffer cmdbuf) { 88 const VkBuffer handle = Handle();
89 scheduler.Record([staging = *staging.handle, handle, offset, size](vk::CommandBuffer cmdbuf) {
119 VkBufferMemoryBarrier barrier; 90 VkBufferMemoryBarrier barrier;
120 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; 91 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
121 barrier.pNext = nullptr; 92 barrier.pNext = nullptr;
@@ -123,7 +94,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
123 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; 94 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
124 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 95 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
125 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; 96 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
126 barrier.buffer = buffer; 97 barrier.buffer = handle;
127 barrier.offset = offset; 98 barrier.offset = offset;
128 barrier.size = size; 99 barrier.size = size;
129 100
@@ -131,18 +102,20 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
131 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 102 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
132 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 103 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
133 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); 104 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
134 cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); 105 cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, size});
135 }); 106 });
136 scheduler.Finish(); 107 scheduler.Finish();
137 108
138 std::memcpy(data, staging.commit->Map(size), size); 109 std::memcpy(data, staging.commit->Map(size), size);
139} 110}
140 111
141void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, 112void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
142 std::size_t dst_offset, std::size_t size) { 113 std::size_t size) const {
143 scheduler.RequestOutsideRenderPassOperationContext(); 114 scheduler.RequestOutsideRenderPassOperationContext();
144 scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, 115
145 dst_offset, size](vk::CommandBuffer cmdbuf) { 116 const VkBuffer dst_buffer = Handle();
117 scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset,
118 size](vk::CommandBuffer cmdbuf) {
146 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); 119 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
147 120
148 std::array<VkBufferMemoryBarrier, 2> barriers; 121 std::array<VkBufferMemoryBarrier, 2> barriers;
@@ -169,4 +142,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
169 }); 142 });
170} 143}
171 144
145VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
146 const VKDevice& device, VKMemoryManager& memory_manager,
147 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
148 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
149 CreateStreamBuffer(device,
150 scheduler)},
151 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
152 staging_pool} {}
153
154VKBufferCache::~VKBufferCache() = default;
155
156std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
157 return std::make_shared<Buffer>(device, memory_manager, scheduler, staging_pool, cpu_addr,
158 size);
159}
160
161VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
162 size = std::max(size, std::size_t(4));
163 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
164 scheduler.RequestOutsideRenderPassOperationContext();
165 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
166 cmdbuf.FillBuffer(buffer, 0, size, 0);
167 });
168 return {*empty.handle, 0, 0};
169}
170
172} // namespace Vulkan 171} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..3630aca77 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 11#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 12#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
14#include "video_core/renderer_vulkan/vk_stream_buffer.h" 13#include "video_core/renderer_vulkan/vk_stream_buffer.h"
@@ -24,22 +23,34 @@ class VKDevice;
24class VKMemoryManager; 23class VKMemoryManager;
25class VKScheduler; 24class VKScheduler;
26 25
27class CachedBufferBlock final : public VideoCommon::BufferBlock { 26class Buffer final : public VideoCommon::BufferBlock {
28public: 27public:
29 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 28 explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler,
30 VAddr cpu_addr, std::size_t size); 29 VKStagingBufferPool& staging_pool, VAddr cpu_addr, std::size_t size);
31 ~CachedBufferBlock(); 30 ~Buffer();
32 31
33 VkBuffer GetHandle() const { 32 void Upload(std::size_t offset, std::size_t size, const u8* data) const;
33
34 void Download(std::size_t offset, std::size_t size, u8* data) const;
35
36 void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset,
37 std::size_t size) const;
38
39 VkBuffer Handle() const {
34 return *buffer.handle; 40 return *buffer.handle;
35 } 41 }
36 42
43 u64 Address() const {
44 return 0;
45 }
46
37private: 47private:
48 VKScheduler& scheduler;
49 VKStagingBufferPool& staging_pool;
50
38 VKBuffer buffer; 51 VKBuffer buffer;
39}; 52};
40 53
41using Buffer = std::shared_ptr<CachedBufferBlock>;
42
43class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { 54class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
44public: 55public:
45 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 56 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@@ -47,21 +58,10 @@ public:
47 VKScheduler& scheduler, VKStagingBufferPool& staging_pool); 58 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
48 ~VKBufferCache(); 59 ~VKBufferCache();
49 60
50 VkBuffer GetEmptyBuffer(std::size_t size) override; 61 BufferInfo GetEmptyBuffer(std::size_t size) override;
51 62
52protected: 63protected:
53 VkBuffer ToHandle(const Buffer& buffer) override; 64 std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override;
54
55 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
56
57 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
58 const u8* data) override;
59
60 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
61 u8* data) override;
62
63 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
64 std::size_t dst_offset, std::size_t size) override;
65 65
66private: 66private:
67 const VKDevice& device; 67 const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 8e1b46277..281bf9ac3 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
53 }; 53 };
54 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); 54 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
55 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); 55 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
56 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); 56 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
57 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); 57 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
58 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
58 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); 59 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
59 60
60 VkDescriptorSetLayoutCreateInfo ci; 61 VkDescriptorSetLayoutCreateInfo ci;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index 890fd52cf..9259b618d 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
42 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, 42 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
43 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, 43 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
44 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, 44 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
45 {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64},
45 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; 46 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
46 47
47 VkDescriptorPoolCreateInfo ci; 48 VkDescriptorPoolCreateInfo ci;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 750e5a0ca..9fd8ac3f6 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
73 73
74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( 74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { 75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
76 static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, 76 static constexpr std::array formats{
77 VK_FORMAT_A8B8G8R8_UINT_PACK32, 77 VK_FORMAT_A8B8G8R8_UNORM_PACK32,
78 VK_FORMAT_A8B8G8R8_SNORM_PACK32, 78 VK_FORMAT_A8B8G8R8_UINT_PACK32,
79 VK_FORMAT_A8B8G8R8_SRGB_PACK32, 79 VK_FORMAT_A8B8G8R8_SNORM_PACK32,
80 VK_FORMAT_B5G6R5_UNORM_PACK16, 80 VK_FORMAT_A8B8G8R8_SRGB_PACK32,
81 VK_FORMAT_A2B10G10R10_UNORM_PACK32, 81 VK_FORMAT_B5G6R5_UNORM_PACK16,
82 VK_FORMAT_A1R5G5B5_UNORM_PACK16, 82 VK_FORMAT_A2B10G10R10_UNORM_PACK32,
83 VK_FORMAT_R32G32B32A32_SFLOAT, 83 VK_FORMAT_A1R5G5B5_UNORM_PACK16,
84 VK_FORMAT_R32G32B32A32_UINT, 84 VK_FORMAT_R32G32B32A32_SFLOAT,
85 VK_FORMAT_R32G32_SFLOAT, 85 VK_FORMAT_R32G32B32A32_UINT,
86 VK_FORMAT_R32G32_UINT, 86 VK_FORMAT_R32G32_SFLOAT,
87 VK_FORMAT_R16G16B16A16_UINT, 87 VK_FORMAT_R32G32_UINT,
88 VK_FORMAT_R16G16B16A16_SNORM, 88 VK_FORMAT_R16G16B16A16_UINT,
89 VK_FORMAT_R16G16B16A16_UNORM, 89 VK_FORMAT_R16G16B16A16_SNORM,
90 VK_FORMAT_R16G16_UNORM, 90 VK_FORMAT_R16G16B16A16_UNORM,
91 VK_FORMAT_R16G16_SNORM, 91 VK_FORMAT_R16G16_UNORM,
92 VK_FORMAT_R16G16_SFLOAT, 92 VK_FORMAT_R16G16_SNORM,
93 VK_FORMAT_R16_UNORM, 93 VK_FORMAT_R16G16_SFLOAT,
94 VK_FORMAT_R8G8B8A8_SRGB, 94 VK_FORMAT_R16_UNORM,
95 VK_FORMAT_R8G8_UNORM, 95 VK_FORMAT_R16_UINT,
96 VK_FORMAT_R8G8_SNORM, 96 VK_FORMAT_R8G8B8A8_SRGB,
97 VK_FORMAT_R8G8_UINT, 97 VK_FORMAT_R8G8_UNORM,
98 VK_FORMAT_R8_UNORM, 98 VK_FORMAT_R8G8_SNORM,
99 VK_FORMAT_R8_UINT, 99 VK_FORMAT_R8G8_UINT,
100 VK_FORMAT_B10G11R11_UFLOAT_PACK32, 100 VK_FORMAT_R8_UNORM,
101 VK_FORMAT_R32_SFLOAT, 101 VK_FORMAT_R8_UINT,
102 VK_FORMAT_R32_UINT, 102 VK_FORMAT_B10G11R11_UFLOAT_PACK32,
103 VK_FORMAT_R32_SINT, 103 VK_FORMAT_R32_SFLOAT,
104 VK_FORMAT_R16_SFLOAT, 104 VK_FORMAT_R32_UINT,
105 VK_FORMAT_R16G16B16A16_SFLOAT, 105 VK_FORMAT_R32_SINT,
106 VK_FORMAT_B8G8R8A8_UNORM, 106 VK_FORMAT_R16_SFLOAT,
107 VK_FORMAT_B8G8R8A8_SRGB, 107 VK_FORMAT_R16G16B16A16_SFLOAT,
108 VK_FORMAT_R4G4B4A4_UNORM_PACK16, 108 VK_FORMAT_B8G8R8A8_UNORM,
109 VK_FORMAT_D32_SFLOAT, 109 VK_FORMAT_B8G8R8A8_SRGB,
110 VK_FORMAT_D16_UNORM, 110 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
111 VK_FORMAT_D16_UNORM_S8_UINT, 111 VK_FORMAT_D32_SFLOAT,
112 VK_FORMAT_D24_UNORM_S8_UINT, 112 VK_FORMAT_D16_UNORM,
113 VK_FORMAT_D32_SFLOAT_S8_UINT, 113 VK_FORMAT_D16_UNORM_S8_UINT,
114 VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 114 VK_FORMAT_D24_UNORM_S8_UINT,
115 VK_FORMAT_BC2_UNORM_BLOCK, 115 VK_FORMAT_D32_SFLOAT_S8_UINT,
116 VK_FORMAT_BC3_UNORM_BLOCK, 116 VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
117 VK_FORMAT_BC4_UNORM_BLOCK, 117 VK_FORMAT_BC2_UNORM_BLOCK,
118 VK_FORMAT_BC5_UNORM_BLOCK, 118 VK_FORMAT_BC3_UNORM_BLOCK,
119 VK_FORMAT_BC5_SNORM_BLOCK, 119 VK_FORMAT_BC4_UNORM_BLOCK,
120 VK_FORMAT_BC7_UNORM_BLOCK, 120 VK_FORMAT_BC5_UNORM_BLOCK,
121 VK_FORMAT_BC6H_UFLOAT_BLOCK, 121 VK_FORMAT_BC5_SNORM_BLOCK,
122 VK_FORMAT_BC6H_SFLOAT_BLOCK, 122 VK_FORMAT_BC7_UNORM_BLOCK,
123 VK_FORMAT_BC1_RGBA_SRGB_BLOCK, 123 VK_FORMAT_BC6H_UFLOAT_BLOCK,
124 VK_FORMAT_BC2_SRGB_BLOCK, 124 VK_FORMAT_BC6H_SFLOAT_BLOCK,
125 VK_FORMAT_BC3_SRGB_BLOCK, 125 VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
126 VK_FORMAT_BC7_SRGB_BLOCK, 126 VK_FORMAT_BC2_SRGB_BLOCK,
127 VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 127 VK_FORMAT_BC3_SRGB_BLOCK,
128 VK_FORMAT_ASTC_8x8_SRGB_BLOCK, 128 VK_FORMAT_BC7_SRGB_BLOCK,
129 VK_FORMAT_ASTC_8x5_SRGB_BLOCK, 129 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
130 VK_FORMAT_ASTC_5x4_SRGB_BLOCK, 130 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
131 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, 131 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
132 VK_FORMAT_ASTC_5x5_SRGB_BLOCK, 132 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
133 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, 133 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
134 VK_FORMAT_ASTC_10x8_SRGB_BLOCK, 134 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
135 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, 135 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
136 VK_FORMAT_ASTC_6x6_SRGB_BLOCK, 136 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
137 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, 137 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
138 VK_FORMAT_ASTC_10x10_SRGB_BLOCK, 138 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
139 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, 139 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
140 VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 140 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
141 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, 141 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
142 VK_FORMAT_ASTC_8x6_SRGB_BLOCK, 142 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
143 VK_FORMAT_ASTC_6x5_UNORM_BLOCK, 143 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
144 VK_FORMAT_ASTC_6x5_SRGB_BLOCK, 144 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
145 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; 145 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
146 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
147 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
148 };
146 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 149 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
147 for (const auto format : formats) { 150 for (const auto format : formats) {
148 format_properties.emplace(format, physical.GetFormatProperties(format)); 151 format_properties.emplace(format, physical.GetFormatProperties(format));
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a5c7b7945..ea66e621e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
27#include "video_core/renderer_vulkan/wrapper.h" 27#include "video_core/renderer_vulkan/wrapper.h"
28#include "video_core/shader/compiler_settings.h" 28#include "video_core/shader/compiler_settings.h"
29#include "video_core/shader/memory_util.h" 29#include "video_core/shader/memory_util.h"
30#include "video_core/shader_cache.h"
30 31
31namespace Vulkan { 32namespace Vulkan {
32 33
@@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
45constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 46constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
46constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; 47constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
47constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 48constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
49constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
48constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 50constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
49 51
50constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 52constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
@@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
104 u32 binding = base_binding; 106 u32 binding = base_binding;
105 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); 107 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
106 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); 108 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
107 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); 109 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
108 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); 110 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
111 AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
109 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); 112 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
110 return binding; 113 return binding;
111} 114}
@@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
130 return std::memcmp(&rhs, this, sizeof *this) == 0; 133 return std::memcmp(&rhs, this, sizeof *this) == 0;
131} 134}
132 135
133CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, 136Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
134 GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, 137 VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
135 u32 main_offset) 138 : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
136 : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
137 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, 139 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
138 compiler_settings, registry}, 140 compiler_settings, registry},
139 entries{GenerateShaderEntries(shader_ir)} {} 141 entries{GenerateShaderEntries(shader_ir)} {}
140 142
141CachedShader::~CachedShader() = default; 143Shader::~Shader() = default;
142 144
143Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( 145Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
144 Core::System& system, Tegra::Engines::ShaderType stage) { 146 Tegra::Engines::ShaderType stage) {
145 if (stage == Tegra::Engines::ShaderType::Compute) { 147 if (stage == ShaderType::Compute) {
146 return system.GPU().KeplerCompute(); 148 return system.GPU().KeplerCompute();
147 } else { 149 } else {
148 return system.GPU().Maxwell3D(); 150 return system.GPU().Maxwell3D();
@@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
154 VKDescriptorPool& descriptor_pool, 156 VKDescriptorPool& descriptor_pool,
155 VKUpdateDescriptorQueue& update_descriptor_queue, 157 VKUpdateDescriptorQueue& update_descriptor_queue,
156 VKRenderPassCache& renderpass_cache) 158 VKRenderPassCache& renderpass_cache)
157 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, 159 : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
158 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, 160 scheduler{scheduler}, descriptor_pool{descriptor_pool},
159 renderpass_cache{renderpass_cache} {} 161 update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
160 162
161VKPipelineCache::~VKPipelineCache() = default; 163VKPipelineCache::~VKPipelineCache() = default;
162 164
163std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 165std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
164 const auto& gpu = system.GPU().Maxwell3D(); 166 const auto& gpu = system.GPU().Maxwell3D();
165 167
166 std::array<Shader, Maxwell::MaxShaderProgram> shaders; 168 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
167 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 169 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
168 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 170 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
169 171
@@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
176 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 178 const GPUVAddr program_addr{GetShaderAddress(system, program)};
177 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 179 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
178 ASSERT(cpu_addr); 180 ASSERT(cpu_addr);
179 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; 181
180 if (!shader) { 182 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
183 if (!result) {
181 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 184 const auto host_ptr{memory_manager.GetPointer(program_addr)};
182 185
183 // No shader found - create a new one 186 // No shader found - create a new one
184 constexpr u32 stage_offset = STAGE_MAIN_OFFSET; 187 constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
185 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); 188 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
186 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); 189 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
190 const std::size_t size_in_bytes = code.size() * sizeof(u64);
191
192 auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
193 stage_offset);
194 result = shader.get();
187 195
188 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
189 std::move(code), stage_offset);
190 if (cpu_addr) { 196 if (cpu_addr) {
191 Register(shader); 197 Register(std::move(shader), *cpu_addr, size_in_bytes);
192 } else { 198 } else {
193 null_shader = shader; 199 null_shader = std::move(shader);
194 } 200 }
195 } 201 }
196 shaders[index] = std::move(shader); 202 shaders[index] = result;
197 } 203 }
198 return last_shaders = shaders; 204 return last_shaders = shaders;
199} 205}
@@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
234 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 240 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
235 ASSERT(cpu_addr); 241 ASSERT(cpu_addr);
236 242
237 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; 243 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
238 if (!shader) { 244 if (!shader) {
239 // No shader found - create a new one 245 // No shader found - create a new one
240 const auto host_ptr = memory_manager.GetPointer(program_addr); 246 const auto host_ptr = memory_manager.GetPointer(program_addr);
241 247
242 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); 248 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
243 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 249 const std::size_t size_in_bytes = code.size() * sizeof(u64);
244 program_addr, *cpu_addr, std::move(code), 250
245 KERNEL_MAIN_OFFSET); 251 auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
252 std::move(code), KERNEL_MAIN_OFFSET);
253 shader = shader_info.get();
254
246 if (cpu_addr) { 255 if (cpu_addr) {
247 Register(shader); 256 Register(std::move(shader_info), *cpu_addr, size_in_bytes);
248 } else { 257 } else {
249 null_kernel = shader; 258 null_kernel = std::move(shader_info);
250 } 259 }
251 } 260 }
252 261
@@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
262 return *entry; 271 return *entry;
263} 272}
264 273
265void VKPipelineCache::Unregister(const Shader& shader) { 274void VKPipelineCache::OnShaderRemoval(Shader* shader) {
266 bool finished = false; 275 bool finished = false;
267 const auto Finish = [&] { 276 const auto Finish = [&] {
268 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and 277 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
294 Finish(); 303 Finish();
295 it = compute_cache.erase(it); 304 it = compute_cache.erase(it);
296 } 305 }
297
298 RasterizerCache::Unregister(shader);
299} 306}
300 307
301std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 308std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -312,7 +319,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
312 ASSERT(point_size != 0.0f); 319 ASSERT(point_size != 0.0f);
313 } 320 }
314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { 321 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
315 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); 322 const auto& attribute = fixed_state.vertex_input.attributes[i];
323 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
324 specialization.attribute_types[i] = attribute.Type();
316 } 325 }
317 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 326 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
318 327
@@ -328,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
328 } 337 }
329 338
330 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 339 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
331 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 340 const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
332 const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; 341 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
333 ASSERT(shader);
334 342
335 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 343 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
336 const auto program_type = GetShaderType(program_enum); 344 const ShaderType program_type = GetShaderType(program_enum);
337 const auto& entries = shader->GetEntries(); 345 const auto& entries = shader->GetEntries();
338 program[stage] = { 346 program[stage] = {
339 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), 347 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
@@ -375,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3
375 return; 383 return;
376 } 384 }
377 385
378 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { 386 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
379 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to 387 descriptor_type == STORAGE_TEXEL_BUFFER) {
380 // crash. 388 // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
389 // Note: Fixed in driver Windows 443.24, Linux 440.66.15
381 for (u32 i = 0; i < count; ++i) { 390 for (u32 i = 0; i < count; ++i) {
382 VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); 391 VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
383 entry.dstBinding = binding + i; 392 entry.dstBinding = binding + i;
384 entry.dstArrayElement = 0; 393 entry.dstArrayElement = 0;
385 entry.descriptorCount = 1; 394 entry.descriptorCount = 1;
386 entry.descriptorType = descriptor_type; 395 entry.descriptorType = descriptor_type;
387 entry.offset = offset + i * entry_size; 396 entry.offset = static_cast<std::size_t>(offset + i * entry_size);
388 entry.stride = entry_size; 397 entry.stride = entry_size;
389 } 398 }
390 } else if (count > 0) { 399 } else if (count > 0) {
@@ -405,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries(
405 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { 414 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
406 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); 415 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
407 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); 416 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
408 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); 417 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
409 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); 418 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
419 AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
410 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); 420 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
411} 421}
412 422
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a36e5112 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "video_core/engines/const_buffer_engine_interface.h"
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/rasterizer_cache.h"
21#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
26#include "video_core/shader/memory_util.h" 25#include "video_core/shader/memory_util.h"
27#include "video_core/shader/registry.h" 26#include "video_core/shader/registry.h"
28#include "video_core/shader/shader_ir.h" 27#include "video_core/shader/shader_ir.h"
28#include "video_core/shader_cache.h"
29 29
30namespace Core { 30namespace Core {
31class System; 31class System;
@@ -41,8 +41,6 @@ class VKFence;
41class VKScheduler; 41class VKScheduler;
42class VKUpdateDescriptorQueue; 42class VKUpdateDescriptorQueue;
43 43
44class CachedShader;
45using Shader = std::shared_ptr<CachedShader>;
46using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47 45
48struct GraphicsPipelineCacheKey { 46struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
102 100
103namespace Vulkan { 101namespace Vulkan {
104 102
105class CachedShader final : public RasterizerCacheObject { 103class Shader {
106public: 104public:
107 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, 105 explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
108 VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, 106 VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
109 u32 main_offset); 107 ~Shader();
110 ~CachedShader();
111 108
112 GPUVAddr GetGpuAddr() const { 109 GPUVAddr GetGpuAddr() const {
113 return gpu_addr; 110 return gpu_addr;
114 } 111 }
115 112
116 std::size_t GetSizeInBytes() const override {
117 return program_code.size() * sizeof(u64);
118 }
119
120 VideoCommon::Shader::ShaderIR& GetIR() { 113 VideoCommon::Shader::ShaderIR& GetIR() {
121 return shader_ir; 114 return shader_ir;
122 } 115 }
@@ -144,25 +137,23 @@ private:
144 ShaderEntries entries; 137 ShaderEntries entries;
145}; 138};
146 139
147class VKPipelineCache final : public RasterizerCache<Shader> { 140class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
148public: 141public:
149 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 142 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
150 const VKDevice& device, VKScheduler& scheduler, 143 const VKDevice& device, VKScheduler& scheduler,
151 VKDescriptorPool& descriptor_pool, 144 VKDescriptorPool& descriptor_pool,
152 VKUpdateDescriptorQueue& update_descriptor_queue, 145 VKUpdateDescriptorQueue& update_descriptor_queue,
153 VKRenderPassCache& renderpass_cache); 146 VKRenderPassCache& renderpass_cache);
154 ~VKPipelineCache(); 147 ~VKPipelineCache() override;
155 148
156 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); 149 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
157 150
158 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); 151 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
159 152
160 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 153 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
161 154
162protected: 155protected:
163 void Unregister(const Shader& shader) override; 156 void OnShaderRemoval(Shader* shader) final;
164
165 void FlushObjectInner(const Shader& object) override {}
166 157
167private: 158private:
168 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 159 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
175 VKUpdateDescriptorQueue& update_descriptor_queue; 166 VKUpdateDescriptorQueue& update_descriptor_queue;
176 VKRenderPassCache& renderpass_cache; 167 VKRenderPassCache& renderpass_cache;
177 168
178 Shader null_shader{}; 169 std::unique_ptr<Shader> null_shader;
179 Shader null_kernel{}; 170 std::unique_ptr<Shader> null_kernel;
180 171
181 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 172 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
182 173
183 GraphicsPipelineCacheKey last_graphics_key; 174 GraphicsPipelineCacheKey last_graphics_key;
184 VKGraphicsPipeline* last_graphics_pipeline = nullptr; 175 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index be5b77fae..a8d94eac3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
38#include "video_core/renderer_vulkan/vk_texture_cache.h" 38#include "video_core/renderer_vulkan/vk_texture_cache.h"
39#include "video_core/renderer_vulkan/vk_update_descriptor.h" 39#include "video_core/renderer_vulkan/vk_update_descriptor.h"
40#include "video_core/renderer_vulkan/wrapper.h" 40#include "video_core/renderer_vulkan/wrapper.h"
41#include "video_core/shader_cache.h"
41 42
42namespace Vulkan { 43namespace Vulkan {
43 44
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
98} 99}
99 100
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 101std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { 102 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; 103 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (std::size_t i = 0; i < std::size(addresses); ++i) { 104 for (std::size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; 105 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
117Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 118Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
118 std::size_t stage, std::size_t index = 0) { 119 std::size_t stage, std::size_t index = 0) {
119 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 120 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
121 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
122 if (entry.is_separated) {
123 const u32 buffer_1 = entry.buffer;
124 const u32 buffer_2 = entry.secondary_buffer;
125 const u32 offset_1 = entry.offset;
126 const u32 offset_2 = entry.secondary_offset;
127 const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
128 const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
129 return engine.GetTextureInfo(handle_1 | handle_2);
130 }
131 }
120 if (entry.is_bindless) { 132 if (entry.is_bindless) {
121 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); 133 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
122 return engine.GetTextureInfo(tex_handle); 134 return engine.GetTextureInfo(tex_handle);
@@ -131,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
131 } 143 }
132} 144}
133 145
146/// @brief Determine if an attachment to be updated has to preserve contents
147/// @param is_clear True when a clear is being executed
148/// @param regs 3D registers
149/// @return True when the contents have to be preserved
150bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
151 if (!is_clear) {
152 return true;
153 }
154 // First we have to make sure all clear masks are enabled.
155 if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
156 !regs.clear_buffers.A) {
157 return true;
158 }
159 // If scissors are disabled, the whole screen is cleared
160 if (!regs.clear_flags.scissor) {
161 return false;
162 }
163 // Then we have to confirm scissor testing clears the whole image
164 const std::size_t index = regs.clear_buffers.RT;
165 const auto& scissor = regs.scissor_test[0];
166 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
167 scissor.max_y < regs.rt[index].height;
168}
169
170/// @brief Determine if an attachment to be updated has to preserve contents
171/// @param is_clear True when a clear is being executed
172/// @param regs 3D registers
173/// @return True when the contents have to be preserved
174bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
175 // If we are not clearing, the contents have to be preserved
176 if (!is_clear) {
177 return true;
178 }
179 // For depth stencil clears we only have to confirm scissor test covers the whole image
180 if (!regs.clear_flags.scissor) {
181 return false;
182 }
183 // Make sure the clear cover the whole image
184 const auto& scissor = regs.scissor_test[0];
185 return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
186 scissor.max_y < regs.zeta_height;
187}
188
134} // Anonymous namespace 189} // Anonymous namespace
135 190
136class BufferBindings final { 191class BufferBindings final {
@@ -332,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
332 387
333 buffer_cache.Unmap(); 388 buffer_cache.Unmap();
334 389
335 const Texceptions texceptions = UpdateAttachments(); 390 const Texceptions texceptions = UpdateAttachments(false);
336 SetupImageTransitions(texceptions, color_attachments, zeta_attachment); 391 SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
337 392
338 key.renderpass_params = GetRenderPassParams(texceptions); 393 key.renderpass_params = GetRenderPassParams(texceptions);
@@ -388,7 +443,7 @@ void RasterizerVulkan::Clear() {
388 return; 443 return;
389 } 444 }
390 445
391 [[maybe_unused]] const auto texceptions = UpdateAttachments(); 446 [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
392 DEBUG_ASSERT(texceptions.none()); 447 DEBUG_ASSERT(texceptions.none());
393 SetupImageTransitions(0, color_attachments, zeta_attachment); 448 SetupImageTransitions(0, color_attachments, zeta_attachment);
394 449
@@ -468,8 +523,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
468 const auto& entries = pipeline.GetEntries(); 523 const auto& entries = pipeline.GetEntries();
469 SetupComputeConstBuffers(entries); 524 SetupComputeConstBuffers(entries);
470 SetupComputeGlobalBuffers(entries); 525 SetupComputeGlobalBuffers(entries);
471 SetupComputeTexelBuffers(entries); 526 SetupComputeUniformTexels(entries);
472 SetupComputeTextures(entries); 527 SetupComputeTextures(entries);
528 SetupComputeStorageTexels(entries);
473 SetupComputeImages(entries); 529 SetupComputeImages(entries);
474 530
475 buffer_cache.Unmap(); 531 buffer_cache.Unmap();
@@ -664,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
664 draw_counter = 0; 720 draw_counter = 0;
665} 721}
666 722
667RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { 723RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
668 MICROPROFILE_SCOPE(Vulkan_RenderTargets); 724 MICROPROFILE_SCOPE(Vulkan_RenderTargets);
669 auto& dirty = system.GPU().Maxwell3D().dirty.flags; 725 auto& maxwell3d = system.GPU().Maxwell3D();
726 auto& dirty = maxwell3d.dirty.flags;
727 auto& regs = maxwell3d.regs;
728
670 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; 729 const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
671 dirty[VideoCommon::Dirty::RenderTargets] = false; 730 dirty[VideoCommon::Dirty::RenderTargets] = false;
672 731
@@ -675,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
675 Texceptions texceptions; 734 Texceptions texceptions;
676 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { 735 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
677 if (update_rendertargets) { 736 if (update_rendertargets) {
678 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); 737 const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
738 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
679 } 739 }
680 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { 740 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
681 texceptions[rt] = true; 741 texceptions[rt] = true;
@@ -683,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
683 } 743 }
684 744
685 if (update_rendertargets) { 745 if (update_rendertargets) {
686 zeta_attachment = texture_cache.GetDepthBufferSurface(true); 746 const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
747 zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
687 } 748 }
688 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { 749 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
689 texceptions[ZETA_TEXCEPTION_INDEX] = true; 750 texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -715,7 +776,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
715 if (!view) { 776 if (!view) {
716 return false; 777 return false;
717 } 778 }
718 key.views.push_back(view->GetHandle()); 779 key.views.push_back(view->GetAttachment());
719 key.width = std::min(key.width, view->GetWidth()); 780 key.width = std::min(key.width, view->GetWidth());
720 key.height = std::min(key.height, view->GetHeight()); 781 key.height = std::min(key.height, view->GetHeight());
721 key.layers = std::min(key.layers, view->GetNumLayers()); 782 key.layers = std::min(key.layers, view->GetNumLayers());
@@ -775,20 +836,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
775} 836}
776 837
777void RasterizerVulkan::SetupShaderDescriptors( 838void RasterizerVulkan::SetupShaderDescriptors(
778 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { 839 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
779 texture_cache.GuardSamplers(true); 840 texture_cache.GuardSamplers(true);
780 841
781 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 842 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
782 // Skip VertexA stage 843 // Skip VertexA stage
783 const auto& shader = shaders[stage + 1]; 844 Shader* const shader = shaders[stage + 1];
784 if (!shader) { 845 if (!shader) {
785 continue; 846 continue;
786 } 847 }
787 const auto& entries = shader->GetEntries(); 848 const auto& entries = shader->GetEntries();
788 SetupGraphicsConstBuffers(entries, stage); 849 SetupGraphicsConstBuffers(entries, stage);
789 SetupGraphicsGlobalBuffers(entries, stage); 850 SetupGraphicsGlobalBuffers(entries, stage);
790 SetupGraphicsTexelBuffers(entries, stage); 851 SetupGraphicsUniformTexels(entries, stage);
791 SetupGraphicsTextures(entries, stage); 852 SetupGraphicsTextures(entries, stage);
853 SetupGraphicsStorageTexels(entries, stage);
792 SetupGraphicsImages(entries, stage); 854 SetupGraphicsImages(entries, stage);
793 } 855 }
794 texture_cache.GuardSamplers(false); 856 texture_cache.GuardSamplers(false);
@@ -838,6 +900,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
838 if (regs.tfb_enabled == 0) { 900 if (regs.tfb_enabled == 0) {
839 return; 901 return;
840 } 902 }
903 if (!device.IsExtTransformFeedbackSupported()) {
904 LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
905 return;
906 }
841 907
842 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || 908 UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
843 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || 909 regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
@@ -852,10 +918,10 @@ void RasterizerVulkan::BeginTransformFeedback() {
852 UNIMPLEMENTED_IF(binding.buffer_offset != 0); 918 UNIMPLEMENTED_IF(binding.buffer_offset != 0);
853 919
854 const GPUVAddr gpu_addr = binding.Address(); 920 const GPUVAddr gpu_addr = binding.Address();
855 const std::size_t size = binding.buffer_size; 921 const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size);
856 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 922 const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
857 923
858 scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { 924 scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) {
859 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); 925 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
860 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); 926 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
861 }); 927 });
@@ -866,6 +932,9 @@ void RasterizerVulkan::EndTransformFeedback() {
866 if (regs.tfb_enabled == 0) { 932 if (regs.tfb_enabled == 0) {
867 return; 933 return;
868 } 934 }
935 if (!device.IsExtTransformFeedbackSupported()) {
936 return;
937 }
869 938
870 scheduler.Record( 939 scheduler.Record(
871 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); 940 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
@@ -877,14 +946,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
877 946
878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 947 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
879 const auto& attrib = regs.vertex_attrib_format[index]; 948 const auto& attrib = regs.vertex_attrib_format[index];
880 if (!attrib.IsValid()) { 949 if (attrib.IsConstant()) {
881 vertex_input.SetAttribute(index, false, 0, 0, {}, {}); 950 vertex_input.SetAttribute(index, false, 0, 0, {}, {});
882 continue; 951 continue;
883 } 952 }
884
885 [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
886 ASSERT(buffer.IsEnabled());
887
888 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), 953 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
889 attrib.size.Value()); 954 attrib.size.Value());
890 } 955 }
@@ -908,8 +973,8 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
908 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0); 973 buffer_bindings.AddVertexBinding(DefaultBuffer(), 0);
909 continue; 974 continue;
910 } 975 }
911 const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); 976 const auto info = buffer_cache.UploadMemory(start, size);
912 buffer_bindings.AddVertexBinding(buffer, offset); 977 buffer_bindings.AddVertexBinding(info.handle, info.offset);
913 } 978 }
914} 979}
915 980
@@ -931,7 +996,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
931 break; 996 break;
932 } 997 }
933 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 998 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
934 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 999 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1000 VkBuffer buffer = info.handle;
1001 u64 offset = info.offset;
935 std::tie(buffer, offset) = quad_indexed_pass.Assemble( 1002 std::tie(buffer, offset) = quad_indexed_pass.Assemble(
936 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); 1003 regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset);
937 1004
@@ -945,7 +1012,9 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
945 break; 1012 break;
946 } 1013 }
947 const GPUVAddr gpu_addr = regs.index_array.IndexStart(); 1014 const GPUVAddr gpu_addr = regs.index_array.IndexStart();
948 auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); 1015 const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
1016 VkBuffer buffer = info.handle;
1017 u64 offset = info.offset;
949 1018
950 auto format = regs.index_array.format; 1019 auto format = regs.index_array.format;
951 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; 1020 const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
@@ -980,12 +1049,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
980 } 1049 }
981} 1050}
982 1051
983void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { 1052void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
984 MICROPROFILE_SCOPE(Vulkan_Textures); 1053 MICROPROFILE_SCOPE(Vulkan_Textures);
985 const auto& gpu = system.GPU().Maxwell3D(); 1054 const auto& gpu = system.GPU().Maxwell3D();
986 for (const auto& entry : entries.texel_buffers) { 1055 for (const auto& entry : entries.uniform_texels) {
987 const auto image = GetTextureInfo(gpu, entry, stage).tic; 1056 const auto image = GetTextureInfo(gpu, entry, stage).tic;
988 SetupTexelBuffer(image, entry); 1057 SetupUniformTexels(image, entry);
989 } 1058 }
990} 1059}
991 1060
@@ -1000,6 +1069,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
1000 } 1069 }
1001} 1070}
1002 1071
1072void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
1073 MICROPROFILE_SCOPE(Vulkan_Textures);
1074 const auto& gpu = system.GPU().Maxwell3D();
1075 for (const auto& entry : entries.storage_texels) {
1076 const auto image = GetTextureInfo(gpu, entry, stage).tic;
1077 SetupStorageTexel(image, entry);
1078 }
1079}
1080
1003void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { 1081void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
1004 MICROPROFILE_SCOPE(Vulkan_Images); 1082 MICROPROFILE_SCOPE(Vulkan_Images);
1005 const auto& gpu = system.GPU().Maxwell3D(); 1083 const auto& gpu = system.GPU().Maxwell3D();
@@ -1032,12 +1110,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
1032 } 1110 }
1033} 1111}
1034 1112
1035void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { 1113void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
1036 MICROPROFILE_SCOPE(Vulkan_Textures); 1114 MICROPROFILE_SCOPE(Vulkan_Textures);
1037 const auto& gpu = system.GPU().KeplerCompute(); 1115 const auto& gpu = system.GPU().KeplerCompute();
1038 for (const auto& entry : entries.texel_buffers) { 1116 for (const auto& entry : entries.uniform_texels) {
1039 const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; 1117 const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
1040 SetupTexelBuffer(image, entry); 1118 SetupUniformTexels(image, entry);
1041 } 1119 }
1042} 1120}
1043 1121
@@ -1052,6 +1130,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
1052 } 1130 }
1053} 1131}
1054 1132
1133void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
1134 MICROPROFILE_SCOPE(Vulkan_Textures);
1135 const auto& gpu = system.GPU().KeplerCompute();
1136 for (const auto& entry : entries.storage_texels) {
1137 const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
1138 SetupStorageTexel(image, entry);
1139 }
1140}
1141
1055void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { 1142void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
1056 MICROPROFILE_SCOPE(Vulkan_Images); 1143 MICROPROFILE_SCOPE(Vulkan_Images);
1057 const auto& gpu = system.GPU().KeplerCompute(); 1144 const auto& gpu = system.GPU().KeplerCompute();
@@ -1074,10 +1161,9 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
1074 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); 1161 Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
1075 ASSERT(size <= MaxConstbufferSize); 1162 ASSERT(size <= MaxConstbufferSize);
1076 1163
1077 const auto [buffer_handle, offset] = 1164 const auto info =
1078 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); 1165 buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
1079 1166 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1080 update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
1081} 1167}
1082 1168
1083void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { 1169void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
@@ -1091,18 +1177,18 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
1091 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the 1177 // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the
1092 // default buffer. 1178 // default buffer.
1093 static constexpr std::size_t dummy_size = 4; 1179 static constexpr std::size_t dummy_size = 4;
1094 const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); 1180 const auto info = buffer_cache.GetEmptyBuffer(dummy_size);
1095 update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); 1181 update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size);
1096 return; 1182 return;
1097 } 1183 }
1098 1184
1099 const auto [buffer, offset] = buffer_cache.UploadMemory( 1185 const auto info = buffer_cache.UploadMemory(
1100 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); 1186 actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
1101 update_descriptor_queue.AddBuffer(buffer, offset, size); 1187 update_descriptor_queue.AddBuffer(info.handle, info.offset, size);
1102} 1188}
1103 1189
1104void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, 1190void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic,
1105 const TexelBufferEntry& entry) { 1191 const UniformTexelEntry& entry) {
1106 const auto view = texture_cache.GetTextureSurface(tic, entry); 1192 const auto view = texture_cache.GetTextureSurface(tic, entry);
1107 ASSERT(view->IsBufferView()); 1193 ASSERT(view->IsBufferView());
1108 1194
@@ -1114,16 +1200,24 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
1114 auto view = texture_cache.GetTextureSurface(texture.tic, entry); 1200 auto view = texture_cache.GetTextureSurface(texture.tic, entry);
1115 ASSERT(!view->IsBufferView()); 1201 ASSERT(!view->IsBufferView());
1116 1202
1117 const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, 1203 const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source,
1118 texture.tic.z_source, texture.tic.w_source); 1204 texture.tic.z_source, texture.tic.w_source);
1119 const auto sampler = sampler_cache.GetSampler(texture.tsc); 1205 const auto sampler = sampler_cache.GetSampler(texture.tsc);
1120 update_descriptor_queue.AddSampledImage(sampler, image_view); 1206 update_descriptor_queue.AddSampledImage(sampler, image_view);
1121 1207
1122 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1208 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1123 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 1209 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1124 sampled_views.push_back(ImageView{std::move(view), image_layout}); 1210 sampled_views.push_back(ImageView{std::move(view), image_layout});
1125} 1211}
1126 1212
1213void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic,
1214 const StorageTexelEntry& entry) {
1215 const auto view = texture_cache.GetImageSurface(tic, entry);
1216 ASSERT(view->IsBufferView());
1217
1218 update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
1219}
1220
1127void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { 1221void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
1128 auto view = texture_cache.GetImageSurface(tic, entry); 1222 auto view = texture_cache.GetImageSurface(tic, entry);
1129 1223
@@ -1133,10 +1227,11 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
1133 1227
1134 UNIMPLEMENTED_IF(tic.IsBuffer()); 1228 UNIMPLEMENTED_IF(tic.IsBuffer());
1135 1229
1136 const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); 1230 const VkImageView image_view =
1231 view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1137 update_descriptor_queue.AddImage(image_view); 1232 update_descriptor_queue.AddImage(image_view);
1138 1233
1139 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1234 VkImageLayout* const image_layout = update_descriptor_queue.LastImageLayout();
1140 *image_layout = VK_IMAGE_LAYOUT_GENERAL; 1235 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1141 image_views.push_back(ImageView{std::move(view), image_layout}); 1236 image_views.push_back(ImageView{std::move(view), image_layout});
1142} 1237}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0ed0e48c6..83e00e7e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
159 159
160 void FlushWork(); 160 void FlushWork();
161 161
162 Texceptions UpdateAttachments(); 162 /// @brief Updates the currently bound attachments
163 /// @param is_clear True when the framebuffer is updated as a clear
164 /// @return Bitfield of attachments being used as sampled textures
165 Texceptions UpdateAttachments(bool is_clear);
163 166
164 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); 167 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
165 168
@@ -168,7 +171,7 @@ private:
168 bool is_indexed, bool is_instanced); 171 bool is_indexed, bool is_instanced);
169 172
170 /// Setup descriptors in the graphics pipeline. 173 /// Setup descriptors in the graphics pipeline.
171 void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); 174 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
172 175
173 void SetupImageTransitions(Texceptions texceptions, 176 void SetupImageTransitions(Texceptions texceptions,
174 const std::array<View, Maxwell::NumRenderTargets>& color_attachments, 177 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
@@ -193,12 +196,15 @@ private:
193 /// Setup global buffers in the graphics pipeline. 196 /// Setup global buffers in the graphics pipeline.
194 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); 197 void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
195 198
196 /// Setup texel buffers in the graphics pipeline. 199 /// Setup uniform texels in the graphics pipeline.
197 void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); 200 void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
198 201
199 /// Setup textures in the graphics pipeline. 202 /// Setup textures in the graphics pipeline.
200 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); 203 void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
201 204
205 /// Setup storage texels in the graphics pipeline.
206 void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
207
202 /// Setup images in the graphics pipeline. 208 /// Setup images in the graphics pipeline.
203 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); 209 void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
204 210
@@ -209,11 +215,14 @@ private:
209 void SetupComputeGlobalBuffers(const ShaderEntries& entries); 215 void SetupComputeGlobalBuffers(const ShaderEntries& entries);
210 216
211 /// Setup texel buffers in the compute pipeline. 217 /// Setup texel buffers in the compute pipeline.
212 void SetupComputeTexelBuffers(const ShaderEntries& entries); 218 void SetupComputeUniformTexels(const ShaderEntries& entries);
213 219
214 /// Setup textures in the compute pipeline. 220 /// Setup textures in the compute pipeline.
215 void SetupComputeTextures(const ShaderEntries& entries); 221 void SetupComputeTextures(const ShaderEntries& entries);
216 222
223 /// Setup storage texels in the compute pipeline.
224 void SetupComputeStorageTexels(const ShaderEntries& entries);
225
217 /// Setup images in the compute pipeline. 226 /// Setup images in the compute pipeline.
218 void SetupComputeImages(const ShaderEntries& entries); 227 void SetupComputeImages(const ShaderEntries& entries);
219 228
@@ -222,10 +231,12 @@ private:
222 231
223 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); 232 void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
224 233
225 void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); 234 void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
226 235
227 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); 236 void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
228 237
238 void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
239
229 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); 240 void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
230 241
231 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); 242 void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index e6f2fa553..616eacc36 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -9,6 +9,8 @@
9#include "video_core/renderer_vulkan/wrapper.h" 9#include "video_core/renderer_vulkan/wrapper.h"
10#include "video_core/textures/texture.h" 10#include "video_core/textures/texture.h"
11 11
12using Tegra::Texture::TextureMipmapFilter;
13
12namespace Vulkan { 14namespace Vulkan {
13 15
14namespace { 16namespace {
@@ -63,8 +65,8 @@ vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) c
63 ci.maxAnisotropy = tsc.GetMaxAnisotropy(); 65 ci.maxAnisotropy = tsc.GetMaxAnisotropy();
64 ci.compareEnable = tsc.depth_compare_enabled; 66 ci.compareEnable = tsc.depth_compare_enabled;
65 ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); 67 ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
66 ci.minLod = tsc.GetMinLod(); 68 ci.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod();
67 ci.maxLod = tsc.GetMaxLod(); 69 ci.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod();
68 ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color); 70 ci.borderColor = arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color);
69 ci.unnormalizedCoordinates = VK_FALSE; 71 ci.unnormalizedCoordinates = VK_FALSE;
70 return device.GetLogical().CreateSampler(ci); 72 return device.GetLogical().CreateSampler(ci);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 6f6dedd82..97429cc59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -400,8 +400,9 @@ private:
400 u32 binding = specialization.base_binding; 400 u32 binding = specialization.base_binding;
401 binding = DeclareConstantBuffers(binding); 401 binding = DeclareConstantBuffers(binding);
402 binding = DeclareGlobalBuffers(binding); 402 binding = DeclareGlobalBuffers(binding);
403 binding = DeclareTexelBuffers(binding); 403 binding = DeclareUniformTexels(binding);
404 binding = DeclareSamplers(binding); 404 binding = DeclareSamplers(binding);
405 binding = DeclareStorageTexels(binding);
405 binding = DeclareImages(binding); 406 binding = DeclareImages(binding);
406 407
407 const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); 408 const Id main = OpFunction(t_void, {}, TypeFunction(t_void));
@@ -741,8 +742,10 @@ private:
741 if (!IsGenericAttribute(index)) { 742 if (!IsGenericAttribute(index)) {
742 continue; 743 continue;
743 } 744 }
744
745 const u32 location = GetGenericAttributeLocation(index); 745 const u32 location = GetGenericAttributeLocation(index);
746 if (!IsAttributeEnabled(location)) {
747 continue;
748 }
746 const auto type_descriptor = GetAttributeType(location); 749 const auto type_descriptor = GetAttributeType(location);
747 Id type; 750 Id type;
748 if (IsInputAttributeArray()) { 751 if (IsInputAttributeArray()) {
@@ -887,7 +890,7 @@ private:
887 return binding; 890 return binding;
888 } 891 }
889 892
890 u32 DeclareTexelBuffers(u32 binding) { 893 u32 DeclareUniformTexels(u32 binding) {
891 for (const auto& sampler : ir.GetSamplers()) { 894 for (const auto& sampler : ir.GetSamplers()) {
892 if (!sampler.is_buffer) { 895 if (!sampler.is_buffer) {
893 continue; 896 continue;
@@ -908,7 +911,7 @@ private:
908 Decorate(id, spv::Decoration::Binding, binding++); 911 Decorate(id, spv::Decoration::Binding, binding++);
909 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); 912 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
910 913
911 texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id}); 914 uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id});
912 } 915 }
913 return binding; 916 return binding;
914 } 917 }
@@ -943,31 +946,48 @@ private:
943 return binding; 946 return binding;
944 } 947 }
945 948
946 u32 DeclareImages(u32 binding) { 949 u32 DeclareStorageTexels(u32 binding) {
947 for (const auto& image : ir.GetImages()) { 950 for (const auto& image : ir.GetImages()) {
948 const auto [dim, arrayed] = GetImageDim(image); 951 if (image.type != Tegra::Shader::ImageType::TextureBuffer) {
949 constexpr int depth = 0; 952 continue;
950 constexpr bool ms = false;
951 constexpr int sampled = 2; // This won't be accessed with a sampler
952 constexpr auto format = spv::ImageFormat::Unknown;
953 const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
954 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
955 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
956 AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
957
958 Decorate(id, spv::Decoration::Binding, binding++);
959 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
960 if (image.is_read && !image.is_written) {
961 Decorate(id, spv::Decoration::NonWritable);
962 } else if (image.is_written && !image.is_read) {
963 Decorate(id, spv::Decoration::NonReadable);
964 } 953 }
954 DeclareImage(image, binding);
955 }
956 return binding;
957 }
965 958
966 images.emplace(image.index, StorageImage{image_type, id}); 959 u32 DeclareImages(u32 binding) {
960 for (const auto& image : ir.GetImages()) {
961 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
962 continue;
963 }
964 DeclareImage(image, binding);
967 } 965 }
968 return binding; 966 return binding;
969 } 967 }
970 968
969 void DeclareImage(const Image& image, u32& binding) {
970 const auto [dim, arrayed] = GetImageDim(image);
971 constexpr int depth = 0;
972 constexpr bool ms = false;
973 constexpr int sampled = 2; // This won't be accessed with a sampler
974 const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown;
975 const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {});
976 const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type);
977 const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
978 AddGlobalVariable(Name(id, fmt::format("image_{}", image.index)));
979
980 Decorate(id, spv::Decoration::Binding, binding++);
981 Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
982 if (image.is_read && !image.is_written) {
983 Decorate(id, spv::Decoration::NonWritable);
984 } else if (image.is_written && !image.is_read) {
985 Decorate(id, spv::Decoration::NonReadable);
986 }
987
988 images.emplace(image.index, StorageImage{image_type, id});
989 }
990
971 bool IsRenderTargetEnabled(u32 rt) const { 991 bool IsRenderTargetEnabled(u32 rt) const {
972 for (u32 component = 0; component < 4; ++component) { 992 for (u32 component = 0; component < 4; ++component) {
973 if (header.ps.IsColorComponentOutputEnabled(rt, component)) { 993 if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
@@ -986,6 +1006,10 @@ private:
986 return stage == ShaderType::TesselationControl; 1006 return stage == ShaderType::TesselationControl;
987 } 1007 }
988 1008
1009 bool IsAttributeEnabled(u32 location) const {
1010 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
1011 }
1012
989 u32 GetNumInputVertices() const { 1013 u32 GetNumInputVertices() const {
990 switch (stage) { 1014 switch (stage) {
991 case ShaderType::Geometry: 1015 case ShaderType::Geometry:
@@ -1201,16 +1225,20 @@ private:
1201 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); 1225 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1202 return {v_float_zero, Type::Float}; 1226 return {v_float_zero, Type::Float};
1203 default: 1227 default:
1204 if (IsGenericAttribute(attribute)) { 1228 if (!IsGenericAttribute(attribute)) {
1205 const u32 location = GetGenericAttributeLocation(attribute); 1229 break;
1206 const auto type_descriptor = GetAttributeType(location);
1207 const Type type = type_descriptor.type;
1208 const Id attribute_id = input_attributes.at(attribute);
1209 const std::vector elements = {element};
1210 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1211 return {OpLoad(GetTypeDefinition(type), pointer), type};
1212 } 1230 }
1213 break; 1231 const u32 location = GetGenericAttributeLocation(attribute);
1232 if (!IsAttributeEnabled(location)) {
1233 // Disabled attributes (also known as constant attributes) always return zero.
1234 return {v_float_zero, Type::Float};
1235 }
1236 const auto type_descriptor = GetAttributeType(location);
1237 const Type type = type_descriptor.type;
1238 const Id attribute_id = input_attributes.at(attribute);
1239 const std::vector elements = {element};
1240 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1241 return {OpLoad(GetTypeDefinition(type), pointer), type};
1214 } 1242 }
1215 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 1243 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
1216 return {v_float_zero, Type::Float}; 1244 return {v_float_zero, Type::Float};
@@ -1246,7 +1274,7 @@ private:
1246 } else { 1274 } else {
1247 UNREACHABLE_MSG("Unmanaged offset node type"); 1275 UNREACHABLE_MSG("Unmanaged offset node type");
1248 } 1276 }
1249 pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index, 1277 pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index,
1250 buffer_element); 1278 buffer_element);
1251 } 1279 }
1252 return {OpLoad(t_float, pointer), Type::Float}; 1280 return {OpLoad(t_float, pointer), Type::Float};
@@ -1601,7 +1629,7 @@ private:
1601 1629
1602 const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); 1630 const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b);
1603 const Id carry = OpCompositeExtract(t_uint, result, 1); 1631 const Id carry = OpCompositeExtract(t_uint, result, 1);
1604 return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; 1632 return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool};
1605 } 1633 }
1606 1634
1607 Expression LogicalAssign(Operation operation) { 1635 Expression LogicalAssign(Operation operation) {
@@ -1664,7 +1692,7 @@ private:
1664 const auto& meta = std::get<MetaTexture>(operation.GetMeta()); 1692 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1665 const u32 index = meta.sampler.index; 1693 const u32 index = meta.sampler.index;
1666 if (meta.sampler.is_buffer) { 1694 if (meta.sampler.is_buffer) {
1667 const auto& entry = texel_buffers.at(index); 1695 const auto& entry = uniform_texels.at(index);
1668 return OpLoad(entry.image_type, entry.image); 1696 return OpLoad(entry.image_type, entry.image);
1669 } else { 1697 } else {
1670 const auto& entry = sampled_images.at(index); 1698 const auto& entry = sampled_images.at(index);
@@ -1941,39 +1969,20 @@ private:
1941 return {}; 1969 return {};
1942 } 1970 }
1943 1971
1944 Expression AtomicImageAdd(Operation operation) { 1972 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
1945 UNIMPLEMENTED(); 1973 Expression AtomicImage(Operation operation) {
1946 return {}; 1974 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1947 } 1975 ASSERT(meta.values.size() == 1);
1948
1949 Expression AtomicImageMin(Operation operation) {
1950 UNIMPLEMENTED();
1951 return {};
1952 }
1953
1954 Expression AtomicImageMax(Operation operation) {
1955 UNIMPLEMENTED();
1956 return {};
1957 }
1958
1959 Expression AtomicImageAnd(Operation operation) {
1960 UNIMPLEMENTED();
1961 return {};
1962 }
1963
1964 Expression AtomicImageOr(Operation operation) {
1965 UNIMPLEMENTED();
1966 return {};
1967 }
1968 1976
1969 Expression AtomicImageXor(Operation operation) { 1977 const Id coordinate = GetCoordinates(operation, Type::Int);
1970 UNIMPLEMENTED(); 1978 const Id image = images.at(meta.image.index).image;
1971 return {}; 1979 const Id sample = v_uint_zero;
1972 } 1980 const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample);
1973 1981
1974 Expression AtomicImageExchange(Operation operation) { 1982 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
1975 UNIMPLEMENTED(); 1983 const Id semantics = v_uint_zero;
1976 return {}; 1984 const Id value = AsUint(Visit(meta.values[0]));
1985 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
1977 } 1986 }
1978 1987
1979 template <Id (Module::*func)(Id, Id, Id, Id, Id)> 1988 template <Id (Module::*func)(Id, Id, Id, Id, Id)>
@@ -1988,7 +1997,7 @@ private:
1988 return {v_float_zero, Type::Float}; 1997 return {v_float_zero, Type::Float};
1989 } 1998 }
1990 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); 1999 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
1991 const Id semantics = Constant(t_uint, 0); 2000 const Id semantics = v_uint_zero;
1992 const Id value = AsUint(Visit(operation[1])); 2001 const Id value = AsUint(Visit(operation[1]));
1993 2002
1994 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; 2003 return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
@@ -2612,11 +2621,11 @@ private:
2612 2621
2613 &SPIRVDecompiler::ImageLoad, 2622 &SPIRVDecompiler::ImageLoad,
2614 &SPIRVDecompiler::ImageStore, 2623 &SPIRVDecompiler::ImageStore,
2615 &SPIRVDecompiler::AtomicImageAdd, 2624 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>,
2616 &SPIRVDecompiler::AtomicImageAnd, 2625 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>,
2617 &SPIRVDecompiler::AtomicImageOr, 2626 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>,
2618 &SPIRVDecompiler::AtomicImageXor, 2627 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>,
2619 &SPIRVDecompiler::AtomicImageExchange, 2628 &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>,
2620 2629
2621 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, 2630 &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
2622 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, 2631 &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
@@ -2758,8 +2767,11 @@ private:
2758 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); 2767 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2759 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); 2768 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
2760 2769
2770 const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint);
2771
2761 const Id v_float_zero = Constant(t_float, 0.0f); 2772 const Id v_float_zero = Constant(t_float, 0.0f);
2762 const Id v_float_one = Constant(t_float, 1.0f); 2773 const Id v_float_one = Constant(t_float, 1.0f);
2774 const Id v_uint_zero = Constant(t_uint, 0);
2763 2775
2764 // Nvidia uses these defaults for varyings (e.g. position and generic attributes) 2776 // Nvidia uses these defaults for varyings (e.g. position and generic attributes)
2765 const Id v_varying_default = 2777 const Id v_varying_default =
@@ -2784,15 +2796,16 @@ private:
2784 std::unordered_map<u8, GenericVaryingDescription> output_attributes; 2796 std::unordered_map<u8, GenericVaryingDescription> output_attributes;
2785 std::map<u32, Id> constant_buffers; 2797 std::map<u32, Id> constant_buffers;
2786 std::map<GlobalMemoryBase, Id> global_buffers; 2798 std::map<GlobalMemoryBase, Id> global_buffers;
2787 std::map<u32, TexelBuffer> texel_buffers; 2799 std::map<u32, TexelBuffer> uniform_texels;
2788 std::map<u32, SampledImage> sampled_images; 2800 std::map<u32, SampledImage> sampled_images;
2801 std::map<u32, TexelBuffer> storage_texels;
2789 std::map<u32, StorageImage> images; 2802 std::map<u32, StorageImage> images;
2790 2803
2804 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
2791 Id instance_index{}; 2805 Id instance_index{};
2792 Id vertex_index{}; 2806 Id vertex_index{};
2793 Id base_instance{}; 2807 Id base_instance{};
2794 Id base_vertex{}; 2808 Id base_vertex{};
2795 std::array<Id, Maxwell::NumRenderTargets> frag_colors{};
2796 Id frag_depth{}; 2809 Id frag_depth{};
2797 Id frag_coord{}; 2810 Id frag_coord{};
2798 Id front_facing{}; 2811 Id front_facing{};
@@ -3048,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
3048 } 3061 }
3049 for (const auto& sampler : ir.GetSamplers()) { 3062 for (const auto& sampler : ir.GetSamplers()) {
3050 if (sampler.is_buffer) { 3063 if (sampler.is_buffer) {
3051 entries.texel_buffers.emplace_back(sampler); 3064 entries.uniform_texels.emplace_back(sampler);
3052 } else { 3065 } else {
3053 entries.samplers.emplace_back(sampler); 3066 entries.samplers.emplace_back(sampler);
3054 } 3067 }
3055 } 3068 }
3056 for (const auto& image : ir.GetImages()) { 3069 for (const auto& image : ir.GetImages()) {
3057 entries.images.emplace_back(image); 3070 if (image.type == Tegra::Shader::ImageType::TextureBuffer) {
3071 entries.storage_texels.emplace_back(image);
3072 } else {
3073 entries.images.emplace_back(image);
3074 }
3058 } 3075 }
3059 for (const auto& attribute : ir.GetInputAttributes()) { 3076 for (const auto& attribute : ir.GetInputAttributes()) {
3060 if (IsGenericAttribute(attribute)) { 3077 if (IsGenericAttribute(attribute)) {
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..2b0e90396 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -21,8 +21,9 @@ class VKDevice;
21namespace Vulkan { 21namespace Vulkan {
22 22
23using Maxwell = Tegra::Engines::Maxwell3D::Regs; 23using Maxwell = Tegra::Engines::Maxwell3D::Regs;
24using TexelBufferEntry = VideoCommon::Shader::Sampler; 24using UniformTexelEntry = VideoCommon::Shader::Sampler;
25using SamplerEntry = VideoCommon::Shader::Sampler; 25using SamplerEntry = VideoCommon::Shader::Sampler;
26using StorageTexelEntry = VideoCommon::Shader::Image;
26using ImageEntry = VideoCommon::Shader::Image; 27using ImageEntry = VideoCommon::Shader::Image;
27 28
28constexpr u32 DESCRIPTOR_SET = 0; 29constexpr u32 DESCRIPTOR_SET = 0;
@@ -66,13 +67,15 @@ private:
66struct ShaderEntries { 67struct ShaderEntries {
67 u32 NumBindings() const { 68 u32 NumBindings() const {
68 return static_cast<u32>(const_buffers.size() + global_buffers.size() + 69 return static_cast<u32>(const_buffers.size() + global_buffers.size() +
69 texel_buffers.size() + samplers.size() + images.size()); 70 uniform_texels.size() + samplers.size() + storage_texels.size() +
71 images.size());
70 } 72 }
71 73
72 std::vector<ConstBufferEntry> const_buffers; 74 std::vector<ConstBufferEntry> const_buffers;
73 std::vector<GlobalBufferEntry> global_buffers; 75 std::vector<GlobalBufferEntry> global_buffers;
74 std::vector<TexelBufferEntry> texel_buffers; 76 std::vector<UniformTexelEntry> uniform_texels;
75 std::vector<SamplerEntry> samplers; 77 std::vector<SamplerEntry> samplers;
78 std::vector<StorageTexelEntry> storage_texels;
76 std::vector<ImageEntry> images; 79 std::vector<ImageEntry> images;
77 std::set<u32> attributes; 80 std::set<u32> attributes;
78 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 81 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
@@ -88,7 +91,8 @@ struct Specialization final {
88 u32 shared_memory_size{}; 91 u32 shared_memory_size{};
89 92
90 // Graphics specific 93 // Graphics specific
91 std::optional<float> point_size{}; 94 std::optional<float> point_size;
95 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
92 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 96 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
93 bool ndc_minus_one_to_one{}; 97 bool ndc_minus_one_to_one{};
94}; 98};
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index dfddf7ad6..689f0d276 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -35,10 +35,14 @@ public:
35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
36 void Unmap(u64 size); 36 void Unmap(u64 size);
37 37
38 VkBuffer GetHandle() const { 38 VkBuffer Handle() const noexcept {
39 return *buffer; 39 return *buffer;
40 } 40 }
41 41
42 u64 Address() const noexcept {
43 return 0;
44 }
45
42private: 46private:
43 struct Watch final { 47 struct Watch final {
44 VKFenceWatch fence; 48 VKFenceWatch fence;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 2f1d5021d..430031665 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
100 ci.pNext = nullptr; 100 ci.pNext = nullptr;
101 ci.flags = 0; 101 ci.flags = 0;
102 ci.size = static_cast<VkDeviceSize>(host_memory_size); 102 ci.size = static_cast<VkDeviceSize>(host_memory_size);
103 ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 103 ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
104 VK_BUFFER_USAGE_TRANSFER_DST_BIT; 104 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
105 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 105 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
106 ci.queueFamilyIndexCount = 0; 106 ci.queueFamilyIndexCount = 0;
107 ci.pQueueFamilyIndices = nullptr; 107 ci.pQueueFamilyIndices = nullptr;
@@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
167 ci.extent = {params.width, params.height, 1}; 167 ci.extent = {params.width, params.height, 1};
168 break; 168 break;
169 case SurfaceTarget::Texture3D: 169 case SurfaceTarget::Texture3D:
170 ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
170 ci.extent = {params.width, params.height, params.depth}; 171 ci.extent = {params.width, params.height, params.depth};
171 break; 172 break;
172 case SurfaceTarget::TextureBuffer: 173 case SurfaceTarget::TextureBuffer:
@@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP
176 return ci; 177 return ci;
177} 178}
178 179
180u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source,
181 Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) {
182 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
183 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
184}
185
179} // Anonymous namespace 186} // Anonymous namespace
180 187
181CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, 188CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
@@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
203 } 210 }
204 211
205 // TODO(Rodrigo): Move this to a virtual function. 212 // TODO(Rodrigo): Move this to a virtual function.
206 main_view = CreateViewInner( 213 u32 num_layers = 1;
207 ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), 214 if (params.is_layered || params.target == SurfaceTarget::Texture3D) {
208 true); 215 num_layers = params.depth;
216 }
217 main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels));
209} 218}
210 219
211CachedSurface::~CachedSurface() = default; 220CachedSurface::~CachedSurface() = default;
@@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() {
253} 262}
254 263
255View CachedSurface::CreateView(const ViewParams& params) { 264View CachedSurface::CreateView(const ViewParams& params) {
256 return CreateViewInner(params, false);
257}
258
259View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
260 // TODO(Rodrigo): Add name decorations 265 // TODO(Rodrigo): Add name decorations
261 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); 266 return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params);
262} 267}
263 268
264void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { 269void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
@@ -342,18 +347,27 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
342} 347}
343 348
344CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, 349CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
345 const ViewParams& params, bool is_proxy) 350 const ViewParams& params)
346 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, 351 : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
347 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, 352 image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
348 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, 353 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
349 base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, 354 base_level{params.base_level}, num_levels{params.num_levels},
350 num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) 355 image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} {
351 : VK_IMAGE_VIEW_TYPE_1D} {} 356 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
357 base_layer = 0;
358 num_layers = 1;
359 base_slice = params.base_layer;
360 num_slices = params.num_layers;
361 } else {
362 base_layer = params.base_layer;
363 num_layers = params.num_layers;
364 }
365}
352 366
353CachedSurfaceView::~CachedSurfaceView() = default; 367CachedSurfaceView::~CachedSurfaceView() = default;
354 368
355VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, 369VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source,
356 SwizzleSource z_source, SwizzleSource w_source) { 370 SwizzleSource z_source, SwizzleSource w_source) {
357 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 371 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
358 if (last_image_view && last_swizzle == new_swizzle) { 372 if (last_image_view && last_swizzle == new_swizzle) {
359 return last_image_view; 373 return last_image_view;
@@ -399,6 +413,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
399 }); 413 });
400 } 414 }
401 415
416 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
417 ASSERT(base_slice == 0);
418 ASSERT(num_slices == params.depth);
419 }
420
402 VkImageViewCreateInfo ci; 421 VkImageViewCreateInfo ci;
403 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 422 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
404 ci.pNext = nullptr; 423 ci.pNext = nullptr;
@@ -417,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
417 return last_image_view = *image_view; 436 return last_image_view = *image_view;
418} 437}
419 438
439VkImageView CachedSurfaceView::GetAttachment() {
440 if (render_target) {
441 return *render_target;
442 }
443
444 VkImageViewCreateInfo ci;
445 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
446 ci.pNext = nullptr;
447 ci.flags = 0;
448 ci.image = surface.GetImageHandle();
449 ci.format = surface.GetImage().GetFormat();
450 ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
451 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
452 ci.subresourceRange.aspectMask = aspect_mask;
453 ci.subresourceRange.baseMipLevel = base_level;
454 ci.subresourceRange.levelCount = num_levels;
455 if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) {
456 ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D;
457 ci.subresourceRange.baseArrayLayer = base_slice;
458 ci.subresourceRange.layerCount = num_slices;
459 } else {
460 ci.viewType = image_view_type;
461 ci.subresourceRange.baseArrayLayer = base_layer;
462 ci.subresourceRange.layerCount = num_layers;
463 }
464 render_target = device.GetLogical().CreateImageView(ci);
465 return *render_target;
466}
467
420VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 468VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
421 const VKDevice& device, VKResourceManager& resource_manager, 469 const VKDevice& device, VKResourceManager& resource_manager,
422 VKMemoryManager& memory_manager, VKScheduler& scheduler, 470 VKMemoryManager& memory_manager, VKScheduler& scheduler,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f211ccb1e..807e26c8a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -91,7 +91,6 @@ protected:
91 void DecorateSurfaceName(); 91 void DecorateSurfaceName();
92 92
93 View CreateView(const ViewParams& params) override; 93 View CreateView(const ViewParams& params) override;
94 View CreateViewInner(const ViewParams& params, bool is_proxy);
95 94
96private: 95private:
97 void UploadBuffer(const std::vector<u8>& staging_buffer); 96 void UploadBuffer(const std::vector<u8>& staging_buffer);
@@ -120,23 +119,20 @@ private:
120class CachedSurfaceView final : public VideoCommon::ViewBase { 119class CachedSurfaceView final : public VideoCommon::ViewBase {
121public: 120public:
122 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, 121 explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
123 const ViewParams& params, bool is_proxy); 122 const ViewParams& params);
124 ~CachedSurfaceView(); 123 ~CachedSurfaceView();
125 124
126 VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, 125 VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source,
127 Tegra::Texture::SwizzleSource y_source, 126 Tegra::Texture::SwizzleSource y_source,
128 Tegra::Texture::SwizzleSource z_source, 127 Tegra::Texture::SwizzleSource z_source,
129 Tegra::Texture::SwizzleSource w_source); 128 Tegra::Texture::SwizzleSource w_source);
129
130 VkImageView GetAttachment();
130 131
131 bool IsSameSurface(const CachedSurfaceView& rhs) const { 132 bool IsSameSurface(const CachedSurfaceView& rhs) const {
132 return &surface == &rhs.surface; 133 return &surface == &rhs.surface;
133 } 134 }
134 135
135 VkImageView GetHandle() {
136 return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
137 Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
138 }
139
140 u32 GetWidth() const { 136 u32 GetWidth() const {
141 return params.GetMipWidth(base_level); 137 return params.GetMipWidth(base_level);
142 } 138 }
@@ -180,14 +176,6 @@ public:
180 } 176 }
181 177
182private: 178private:
183 static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
184 Tegra::Texture::SwizzleSource y_source,
185 Tegra::Texture::SwizzleSource z_source,
186 Tegra::Texture::SwizzleSource w_source) {
187 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
188 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
189 }
190
191 // Store a copy of these values to avoid double dereference when reading them 179 // Store a copy of these values to avoid double dereference when reading them
192 const SurfaceParams params; 180 const SurfaceParams params;
193 const VkImage image; 181 const VkImage image;
@@ -196,15 +184,18 @@ private:
196 184
197 const VKDevice& device; 185 const VKDevice& device;
198 CachedSurface& surface; 186 CachedSurface& surface;
199 const u32 base_layer;
200 const u32 num_layers;
201 const u32 base_level; 187 const u32 base_level;
202 const u32 num_levels; 188 const u32 num_levels;
203 const VkImageViewType image_view_type; 189 const VkImageViewType image_view_type;
190 u32 base_layer = 0;
191 u32 num_layers = 0;
192 u32 base_slice = 0;
193 u32 num_slices = 0;
204 194
205 VkImageView last_image_view = nullptr; 195 VkImageView last_image_view = nullptr;
206 u32 last_swizzle = 0; 196 u32 last_swizzle = 0;
207 197
198 vk::ImageView render_target;
208 std::unordered_map<u32, vk::ImageView> view_cache; 199 std::unordered_map<u32, vk::ImageView> view_cache;
209}; 200};
210 201
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 681ecde98..351c048d2 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -24,35 +24,25 @@ void VKUpdateDescriptorQueue::TickFrame() {
24} 24}
25 25
26void VKUpdateDescriptorQueue::Acquire() { 26void VKUpdateDescriptorQueue::Acquire() {
27 entries.clear(); 27 // Minimum number of entries required.
28} 28 // This is the maximum number of entries a single draw call migth use.
29 static constexpr std::size_t MIN_ENTRIES = 0x400;
29 30
30void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, 31 if (payload.size() + MIN_ENTRIES >= payload.max_size()) {
31 VkDescriptorSet set) {
32 if (payload.size() + entries.size() >= payload.max_size()) {
33 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); 32 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
34 scheduler.WaitWorker(); 33 scheduler.WaitWorker();
35 payload.clear(); 34 payload.clear();
36 } 35 }
36 upload_start = &*payload.end();
37}
37 38
38 // TODO(Rodrigo): Rework to write the payload directly 39void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
39 const auto payload_start = payload.data() + payload.size(); 40 VkDescriptorSet set) {
40 for (const auto& entry : entries) { 41 const void* const data = upload_start;
41 if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { 42 const vk::Device* const logical = &device.GetLogical();
42 payload.push_back(*image); 43 scheduler.Record([data, logical, set, update_template](vk::CommandBuffer) {
43 } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { 44 logical->UpdateDescriptorSet(set, update_template, data);
44 payload.push_back(*buffer); 45 });
45 } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
46 payload.push_back(*texel);
47 } else {
48 UNREACHABLE();
49 }
50 }
51
52 scheduler.Record(
53 [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
54 logical->UpdateDescriptorSet(set, update_template, payload_start);
55 });
56} 46}
57 47
58} // namespace Vulkan 48} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index cc7e3dff4..945320c72 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -15,17 +15,13 @@ namespace Vulkan {
15class VKDevice; 15class VKDevice;
16class VKScheduler; 16class VKScheduler;
17 17
18class DescriptorUpdateEntry { 18struct DescriptorUpdateEntry {
19public: 19 DescriptorUpdateEntry(VkDescriptorImageInfo image_) : image{image_} {}
20 explicit DescriptorUpdateEntry() {}
21
22 DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
23 20
24 DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} 21 DescriptorUpdateEntry(VkDescriptorBufferInfo buffer_) : buffer{buffer_} {}
25 22
26 DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} 23 DescriptorUpdateEntry(VkBufferView texel_buffer_) : texel_buffer{texel_buffer_} {}
27 24
28private:
29 union { 25 union {
30 VkDescriptorImageInfo image; 26 VkDescriptorImageInfo image;
31 VkDescriptorBufferInfo buffer; 27 VkDescriptorBufferInfo buffer;
@@ -45,32 +41,34 @@ public:
45 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); 41 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
46 42
47 void AddSampledImage(VkSampler sampler, VkImageView image_view) { 43 void AddSampledImage(VkSampler sampler, VkImageView image_view) {
48 entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); 44 payload.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
49 } 45 }
50 46
51 void AddImage(VkImageView image_view) { 47 void AddImage(VkImageView image_view) {
52 entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); 48 payload.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
53 } 49 }
54 50
55 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { 51 void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
56 entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); 52 payload.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
57 } 53 }
58 54
59 void AddTexelBuffer(VkBufferView texel_buffer) { 55 void AddTexelBuffer(VkBufferView texel_buffer) {
60 entries.emplace_back(texel_buffer); 56 payload.emplace_back(texel_buffer);
61 } 57 }
62 58
63 VkImageLayout* GetLastImageLayout() { 59 VkImageLayout* LastImageLayout() {
64 return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; 60 return &payload.back().image.imageLayout;
65 } 61 }
66 62
67private: 63 const VkImageLayout* LastImageLayout() const {
68 using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; 64 return &payload.back().image.imageLayout;
65 }
69 66
67private:
70 const VKDevice& device; 68 const VKDevice& device;
71 VKScheduler& scheduler; 69 VKScheduler& scheduler;
72 70
73 boost::container::static_vector<Variant, 0x400> entries; 71 const DescriptorUpdateEntry* upload_start = nullptr;
74 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload; 72 boost::container::static_vector<DescriptorUpdateEntry, 0x10000> payload;
75}; 73};
76 74
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 2ce9b0626..42eff85d3 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -725,8 +725,7 @@ bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR s
725 return supported == VK_TRUE; 725 return supported == VK_TRUE;
726} 726}
727 727
728VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const 728VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const {
729 noexcept {
730 VkSurfaceCapabilitiesKHR capabilities; 729 VkSurfaceCapabilitiesKHR capabilities;
731 Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); 730 Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities));
732 return capabilities; 731 return capabilities;
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 98937a77a..da42ca88e 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -779,7 +779,7 @@ public:
779 779
780 bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; 780 bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const;
781 781
782 VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; 782 VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const;
783 783
784 std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; 784 std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const;
785 785
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
13 13
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16using std::move;
16using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
18 20
19u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 23 const auto opcode = OpCode::Decode(instr);
22 24
23 if (instr.hset2.ftz == 0) { 25 PredCondition cond;
24 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); 26 bool bf;
27 bool ftz;
28 bool neg_a;
29 bool abs_a;
30 bool neg_b;
31 bool abs_b;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
25 } 54 }
26 55
27 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); 56 Node op_b = [this, instr, opcode] {
28 op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
29
30 Node op_b = [&]() {
31 switch (opcode->get().GetId()) { 57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
32 case OpCode::Id::HSET2_R: 62 case OpCode::Id::HSET2_R:
33 return GetRegister(instr.gpr20); 63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
34 default: 66 default:
35 UNREACHABLE(); 67 UNREACHABLE();
36 return Immediate(0); 68 return Node{};
37 } 69 }
38 }(); 70 }();
39 op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
40 op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
41 71
42 const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); 72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
43 89
44 const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); 90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
45 93
46 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); 94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
47 95
48 // HSET2 operates on each half float in the pack. 96 // HSET2 operates on each half float in the pack.
49 std::array<Node, 2> values; 97 std::array<Node, 2> values;
50 for (u32 i = 0; i < 2; ++i) { 98 for (u32 i = 0; i < 2; ++i) {
51 const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; 99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
52 const Node true_value = Immediate(raw_value << (i * 16)); 100 Node true_value = Immediate(raw_value << (i * 16));
53 const Node false_value = Immediate(0); 101 Node false_value = Immediate(0);
54
55 const Node comparison =
56 Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
57 const Node predicate = Operation(combiner, comparison, second_pred);
58 102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
59 values[i] = 105 values[i] =
60 Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); 106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
61 } 107 }
62 108
63 const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); 109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
64 SetRegister(bb, instr.gpr0, value); 110 SetRegister(bb, instr.gpr0, move(value));
65 111
66 return pc; 112 return pc;
67} 113}
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 60b6ad72a..07778dc3e 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -97,6 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
97 break; 97 break;
98 case TextureFormat::B5G6R5: 98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5: 99 case TextureFormat::B6G5R5:
100 case TextureFormat::BF10GF11RF11:
100 if (component == 0) { 101 if (component == 0) {
101 return descriptor.b_type; 102 return descriptor.b_type;
102 } 103 }
@@ -119,7 +120,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
119 } 120 }
120 break; 121 break;
121 } 122 }
122 UNIMPLEMENTED_MSG("texture format not implement={}", format); 123 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
123 return ComponentType::FLOAT; 124 return ComponentType::FLOAT;
124} 125}
125 126
@@ -191,6 +192,14 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
191 return 6; 192 return 6;
192 } 193 }
193 return 0; 194 return 0;
195 case TextureFormat::BF10GF11RF11:
196 if (component == 1 || component == 2) {
197 return 11;
198 }
199 if (component == 0) {
200 return 10;
201 }
202 return 0;
194 case TextureFormat::G8R24: 203 case TextureFormat::G8R24:
195 if (component == 0) { 204 if (component == 0) {
196 return 8; 205 return 8;
@@ -211,10 +220,9 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
211 return (component == 0 || component == 1) ? 8 : 0; 220 return (component == 0 || component == 1) ? 8 : 0;
212 case TextureFormat::G4R4: 221 case TextureFormat::G4R4:
213 return (component == 0 || component == 1) ? 4 : 0; 222 return (component == 0 || component == 1) ? 4 : 0;
214 default:
215 UNIMPLEMENTED_MSG("texture format not implement={}", format);
216 return 0;
217 } 223 }
224 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
225 return 0;
218} 226}
219 227
220std::size_t GetImageComponentMask(TextureFormat format) { 228std::size_t GetImageComponentMask(TextureFormat format) {
@@ -235,6 +243,7 @@ std::size_t GetImageComponentMask(TextureFormat format) {
235 case TextureFormat::R32_B24G8: 243 case TextureFormat::R32_B24G8:
236 case TextureFormat::B5G6R5: 244 case TextureFormat::B5G6R5:
237 case TextureFormat::B6G5R5: 245 case TextureFormat::B6G5R5:
246 case TextureFormat::BF10GF11RF11:
238 return std::size_t{R | G | B}; 247 return std::size_t{R | G | B};
239 case TextureFormat::R32_G32: 248 case TextureFormat::R32_G32:
240 case TextureFormat::R16_G16: 249 case TextureFormat::R16_G16:
@@ -248,10 +257,9 @@ std::size_t GetImageComponentMask(TextureFormat format) {
248 case TextureFormat::R8: 257 case TextureFormat::R8:
249 case TextureFormat::R1: 258 case TextureFormat::R1:
250 return std::size_t{R}; 259 return std::size_t{R};
251 default:
252 UNIMPLEMENTED_MSG("texture format not implement={}", format);
253 return std::size_t{R | G | B | A};
254 } 260 }
261 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
262 return std::size_t{R | G | B | A};
255} 263}
256 264
257std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { 265std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -299,7 +307,7 @@ std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type,
299 return {std::move(original_value), true}; 307 return {std::move(original_value), true};
300 } 308 }
301 default: 309 default:
302 UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); 310 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
303 return {std::move(original_value), true}; 311 return {std::move(original_value), true};
304 } 312 }
305} 313}
@@ -459,7 +467,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
459 default: 467 default:
460 break; 468 break;
461 } 469 }
462 UNIMPLEMENTED_MSG("Unimplemented operation={} type={}", 470 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
463 static_cast<u64>(instr.suatom_d.operation.Value()), 471 static_cast<u64>(instr.suatom_d.operation.Value()),
464 static_cast<u64>(instr.suatom_d.operation_type.Value())); 472 static_cast<u64>(instr.suatom_d.operation_type.Value()));
465 return OperationCode::AtomicImageAdd; 473 return OperationCode::AtomicImageAdd;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d00e10913..c0a8f233f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
83 return Operation(OperationCode::YNegate); 83 return Operation(OperationCode::YNegate);
84 case SystemVariable::InvocationInfo: 84 case SystemVariable::InvocationInfo:
85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); 85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
86 return Immediate(0U); 86 return Immediate(0x00ff'0000U);
87 case SystemVariable::WscaleFactorXY: 87 case SystemVariable::WscaleFactorXY:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); 88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
89 return Immediate(0U); 89 return Immediate(0U);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..29ebf65ba 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
357 return pc; 357 return pc;
358} 358}
359 359
360ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, 360ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
361 std::optional<u32> buffer) { 361 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
362 if (info.IsComplete()) { 362 if (info.IsComplete()) {
363 return info; 363 return info;
364 } 364 }
365 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
366 : registry.ObtainBoundSampler(offset);
367 if (!sampler) { 365 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); 367 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
381 379
382std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, 380std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) { 381 SamplerInfo sampler_info) {
384 const auto offset = static_cast<u32>(sampler.index.Value()); 382 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, offset); 383 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386 384
387 // If this sampler has already been used, return the existing mapping. 385 // If this sampler has already been used, return the existing mapping.
388 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 386 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 const Node sampler_register = GetRegister(reg); 402 const Node sampler_register = GetRegister(reg);
405 const auto [base_node, tracked_sampler_info] = 403 const auto [base_node, tracked_sampler_info] =
406 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); 404 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
407 ASSERT(base_node != nullptr); 405 if (!base_node) {
408 if (base_node == nullptr) { 406 UNREACHABLE();
409 return std::nullopt; 407 return std::nullopt;
410 } 408 }
411 409
412 if (const auto bindless_sampler_info = 410 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
413 std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { 411 const u32 buffer = sampler_info->index;
414 const u32 buffer = bindless_sampler_info->GetIndex(); 412 const u32 offset = sampler_info->offset;
415 const u32 offset = bindless_sampler_info->GetOffset(); 413 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
416 info = GetSamplerInfo(info, offset, buffer);
417 414
418 // If this sampler has already been used, return the existing mapping. 415 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 416 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer = buffer, offset = offset](const Sampler& entry) { 417 [buffer, offset](const Sampler& entry) {
421 return entry.buffer == buffer && entry.offset == offset; 418 return entry.buffer == buffer && entry.offset == offset;
422 }); 419 });
423 if (it != used_samplers.end()) { 420 if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, 428 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false); 429 *info.is_shadow, *info.is_buffer, false);
433 } 430 }
434 if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { 431 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; 432 const std::pair indices = sampler_info->indices;
436 index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); 433 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, base_offset); 434 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
435
436 // Try to use an already created sampler if it exists
437 const auto it = std::find_if(
438 used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
439 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
440 indices == std::pair{entry.buffer, entry.secondary_buffer};
441 });
442 if (it != used_samplers.end()) {
443 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
444 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
445 return *it;
446 }
447
448 // Otherwise create a new mapping for this sampler
449 const u32 next_index = static_cast<u32>(used_samplers.size());
450 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
451 *info.is_shadow, *info.is_buffer);
452 }
453 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
454 const u32 base_offset = sampler_info->base_offset / 4;
455 index_var = GetCustomVariable(sampler_info->bindless_var);
456 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
438 457
439 // If this sampler has already been used, return the existing mapping. 458 // If this sampler has already been used, return the existing mapping.
440 const auto it = std::find_if( 459 const auto it = std::find_if(
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 074f21691..5071c83ca 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -66,12 +66,12 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_add
66 66
67u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, 67u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
68 const ProgramCode& code_b) { 68 const ProgramCode& code_b) {
69 u64 unique_identifier = boost::hash_value(code); 69 size_t unique_identifier = boost::hash_value(code);
70 if (is_a) { 70 if (is_a) {
71 // VertexA programs include two programs 71 // VertexA programs include two programs
72 boost::hash_combine(unique_identifier, boost::hash_value(code_b)); 72 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
73 } 73 }
74 return unique_identifier; 74 return static_cast<u64>(unique_identifier);
75} 75}
76 76
77} // namespace VideoCommon::Shader 77} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c5e5165ff..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
275using Node4 = std::array<Node, 4>; 275using Node4 = std::array<Node, 4>;
276using NodeBlock = std::vector<Node>; 276using NodeBlock = std::vector<Node>;
277 277
278class BindlessSamplerNode; 278struct ArraySamplerNode;
279class ArraySamplerNode; 279struct BindlessSamplerNode;
280struct SeparateSamplerNode;
280 281
281using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; 282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
282using TrackSampler = std::shared_ptr<TrackSamplerData>; 283using TrackSampler = std::shared_ptr<TrackSamplerData>;
283 284
284struct Sampler { 285struct Sampler {
@@ -288,63 +289,51 @@ struct Sampler {
288 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, 289 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
289 is_buffer{is_buffer}, is_indexed{is_indexed} {} 290 is_buffer{is_buffer}, is_indexed{is_indexed} {}
290 291
292 /// Separate sampler constructor
293 constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
295 bool is_buffer)
296 : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
298 is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
299
291 /// Bindless samplers constructor 300 /// Bindless samplers constructor
292 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, 301 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
293 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) 302 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
294 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, 303 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
295 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} 304 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
296 305
297 u32 index = 0; ///< Emulated index given for the this sampler. 306 u32 index = 0; ///< Emulated index given for the this sampler.
298 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. 307 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
299 u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). 308 u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
300 u32 size = 1; ///< Size of the sampler. 309 u32 buffer = 0; ///< Buffer where the bindless sampler is read.
310 u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
311 u32 size = 1; ///< Size of the sampler.
301 312
302 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 313 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
303 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. 314 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
304 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. 315 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
305 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. 316 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
306 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. 317 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
307 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. 318 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
319 bool is_separated = false; ///< Whether the image and sampler is separated or not.
308}; 320};
309 321
310/// Represents a tracked bindless sampler into a direct const buffer 322/// Represents a tracked bindless sampler into a direct const buffer
311class ArraySamplerNode final { 323struct ArraySamplerNode {
312public:
313 explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
314 : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
315
316 constexpr u32 GetIndex() const {
317 return index;
318 }
319
320 constexpr u32 GetBaseOffset() const {
321 return base_offset;
322 }
323
324 constexpr u32 GetIndexVar() const {
325 return bindless_var;
326 }
327
328private:
329 u32 index; 324 u32 index;
330 u32 base_offset; 325 u32 base_offset;
331 u32 bindless_var; 326 u32 bindless_var;
332}; 327};
333 328
334/// Represents a tracked bindless sampler into a direct const buffer 329/// Represents a tracked separate sampler image pair that was folded statically
335class BindlessSamplerNode final { 330struct SeparateSamplerNode {
336public: 331 std::pair<u32, u32> indices;
337 explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} 332 std::pair<u32, u32> offsets;
338 333};
339 constexpr u32 GetIndex() const {
340 return index;
341 }
342
343 constexpr u32 GetOffset() const {
344 return offset;
345 }
346 334
347private: 335/// Represents a tracked bindless sampler into a direct const buffer
336struct BindlessSamplerNode {
348 u32 index; 337 u32 index;
349 u32 offset; 338 u32 offset;
350}; 339};
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
48template <typename T, typename... Args> 48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) { 49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>); 50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); 51 return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
52} 52}
53 53
54template <typename... Args> 54template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..cdf274e54 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
93 return value; 93 return value;
94} 94}
95 95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
97 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
98 SeparateSamplerKey key;
99 key.buffers = buffers;
100 key.offsets = offsets;
101 const auto iter = separate_samplers.find(key);
102 if (iter != separate_samplers.end()) {
103 return iter->second;
104 }
105 if (!engine) {
106 return std::nullopt;
107 }
108
109 const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
110 const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
111 const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
112 separate_samplers.emplace(key, value);
113 return value;
114}
115
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, 116std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) { 117 u32 offset) {
98 const std::pair key = {buffer, offset}; 118 const std::pair key = {buffer, offset};
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..231206765 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
19 19
20namespace VideoCommon::Shader { 20namespace VideoCommon::Shader {
21 21
22struct SeparateSamplerKey {
23 std::pair<u32, u32> buffers;
24 std::pair<u32, u32> offsets;
25};
26
27} // namespace VideoCommon::Shader
28
29namespace std {
30
31template <>
32struct hash<VideoCommon::Shader::SeparateSamplerKey> {
33 std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
34 return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
35 key.offsets.second);
36 }
37};
38
39template <>
40struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
41 bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
42 const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
43 return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
44 }
45};
46
47} // namespace std
48
49namespace VideoCommon::Shader {
50
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; 51using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; 52using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
53using SeparateSamplerMap =
54 std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap = 55using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; 56 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26 57
@@ -73,6 +104,9 @@ public:
73 104
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); 105 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75 106
107 std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
108 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
109
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); 110 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77 111
78 /// Inserts a key. 112 /// Inserts a key.
@@ -128,6 +162,7 @@ private:
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; 162 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys; 163 KeyMap keys;
130 BoundSamplerMap bound_samplers; 164 BoundSamplerMap bound_samplers;
165 SeparateSamplerMap separate_samplers;
131 BindlessSamplerMap bindless_samplers; 166 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer; 167 u32 bound_buffer;
133 GraphicsInfo graphics_info; 168 GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
330 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); 330 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
331 331
332 /// Queries the missing sampler info from the execution context. 332 /// Queries the missing sampler info from the execution context.
333 SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, 333 SamplerInfo GetSamplerInfo(SamplerInfo info,
334 std::optional<u32> buffer = std::nullopt); 334 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
335 335
336 /// Accesses a texture sampler. 336 /// Accesses a texture sampler.
337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); 337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
409 409
410 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 410 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
411 411
412 std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, 412 std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
413 s64 cursor); 413 s64 cursor);
414
415 std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
416 const OperationNode& operation,
417 Node gpr, Node base_offset,
418 Node tracked, const NodeBlock& code,
419 s64 cursor);
414 420
415 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 421 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
416 422
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..d5ed81442 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16namespace { 16namespace {
17
17std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 18std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
18 OperationCode operation_code) { 19 OperationCode operation_code) {
19 for (; cursor >= 0; --cursor) { 20 for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
63 if (const auto operation = std::get_if<OperationNode>(&*node)) { 64 if (const auto operation = std::get_if<OperationNode>(&*node)) {
64 operation->SetAmendIndex(amend_index); 65 operation->SetAmendIndex(amend_index);
65 return true; 66 return true;
66 } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 67 }
68 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
67 conditional->SetAmendIndex(amend_index); 69 conditional->SetAmendIndex(amend_index);
68 return true; 70 return true;
69 } 71 }
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
72 74
73} // Anonymous namespace 75} // Anonymous namespace
74 76
75std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, 77std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
76 s64 cursor) { 78 s64 cursor) {
77 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 79 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
80 const u32 cbuf_index = cbuf->GetIndex();
81
78 // Constant buffer found, test if it's an immediate 82 // Constant buffer found, test if it's an immediate
79 const auto& offset = cbuf->GetOffset(); 83 const auto& offset = cbuf->GetOffset();
80 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 84 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
81 auto track = 85 auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
82 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
83 return {tracked, track}; 86 return {tracked, track};
84 } 87 }
85 if (const auto operation = std::get_if<OperationNode>(&*offset)) { 88 if (const auto operation = std::get_if<OperationNode>(&*offset)) {
86 const u32 bound_buffer = registry.GetBoundBuffer(); 89 const u32 bound_buffer = registry.GetBoundBuffer();
87 if (bound_buffer != cbuf->GetIndex()) { 90 if (bound_buffer != cbuf_index) {
88 return {}; 91 return {};
89 } 92 }
90 const auto pair = DecoupleIndirectRead(*operation); 93 if (const std::optional pair = DecoupleIndirectRead(*operation)) {
91 if (!pair) { 94 auto [gpr, base_offset] = *pair;
92 return {}; 95 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
96 code, cursor);
93 } 97 }
94 auto [gpr, base_offset] = *pair;
95 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
96 const auto& gpu_driver = registry.AccessGuestDriverProfile();
97 const u32 bindless_cv = NewCustomVariable();
98 Node op =
99 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
100
101 const Node cv_node = GetCustomVariable(bindless_cv);
102 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
103 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
104 AmendNodeCv(amend_index, code[cursor]);
105 // TODO Implement Bindless Index custom variable
106 auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
107 offset_inm->GetValue(), bindless_cv);
108 return {tracked, track};
109 } 98 }
110 return {}; 99 return {};
111 } 100 }
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
122 return TrackBindlessSampler(source, code, new_cursor); 111 return TrackBindlessSampler(source, code, new_cursor);
123 } 112 }
124 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 113 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
125 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { 114 const OperationNode& op = *operation;
126 if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); 115
127 std::get<0>(found)) { 116 const OperationCode opcode = operation->GetCode();
128 // Cbuf found in operand. 117 if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
118 ASSERT(op.GetOperandsCount() == 2);
119 auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
120 auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
121 if (node_a && node_b) {
122 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
123 std::pair{offset_a, offset_b});
124 return {tracked, std::move(track)};
125 }
126 }
127 std::size_t i = op.GetOperandsCount();
128 while (i--) {
129 if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
130 // Constant buffer found in operand.
129 return found; 131 return found;
130 } 132 }
131 } 133 }
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
139 return {}; 141 return {};
140} 142}
141 143
144std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
145 const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
146 const NodeBlock& code, s64 cursor) {
147 const auto offset_imm = std::get<ImmediateNode>(*base_offset);
148 const auto& gpu_driver = registry.AccessGuestDriverProfile();
149 const u32 bindless_cv = NewCustomVariable();
150 const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
151 Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
152
153 Node cv_node = GetCustomVariable(bindless_cv);
154 Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
155 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
156 AmendNodeCv(amend_index, code[cursor]);
157
158 // TODO: Implement bindless index custom variable
159 auto track =
160 MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
161 return {tracked, track};
162}
163
142std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, 164std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
143 s64 cursor) const { 165 s64 cursor) const {
144 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 166 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..2dd270e99
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h"
17
18namespace VideoCommon {
19
20template <class T>
21class ShaderCache {
22 static constexpr u64 PAGE_BITS = 14;
23
24 struct Entry {
25 VAddr addr_start;
26 VAddr addr_end;
27 T* data;
28
29 bool is_memory_marked = true;
30
31 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
32 return start < addr_end && addr_start < end;
33 }
34 };
35
36public:
37 virtual ~ShaderCache() = default;
38
39 /// @brief Removes shaders inside a given region
40 /// @note Checks for ranges
41 /// @param addr Start address of the invalidation
42 /// @param size Number of bytes of the invalidation
43 void InvalidateRegion(VAddr addr, std::size_t size) {
44 std::scoped_lock lock{invalidation_mutex};
45 InvalidatePagesInRegion(addr, size);
46 RemovePendingShaders();
47 }
48
49 /// @brief Unmarks a memory region as cached and marks it for removal
50 /// @param addr Start address of the CPU write operation
51 /// @param size Number of bytes of the CPU write operation
52 void OnCPUWrite(VAddr addr, std::size_t size) {
53 std::lock_guard lock{invalidation_mutex};
54 InvalidatePagesInRegion(addr, size);
55 }
56
57 /// @brief Flushes delayed removal operations
58 void SyncGuestHost() {
59 std::scoped_lock lock{invalidation_mutex};
60 RemovePendingShaders();
61 }
62
63 /// @brief Tries to obtain a cached shader starting in a given address
64 /// @note Doesn't check for ranges, the given address has to be the start of the shader
65 /// @param addr Start address of the shader, this doesn't cache for region
66 /// @return Pointer to a valid shader, nullptr when nothing is found
67 T* TryGet(VAddr addr) const {
68 std::scoped_lock lock{lookup_mutex};
69
70 const auto it = lookup_cache.find(addr);
71 if (it == lookup_cache.end()) {
72 return nullptr;
73 }
74 return it->second->data;
75 }
76
77protected:
78 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
79
80 /// @brief Register in the cache a given entry
81 /// @param data Shader to store in the cache
82 /// @param addr Start address of the shader that will be registered
83 /// @param size Size in bytes of the shader
84 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
85 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
86
87 const VAddr addr_end = addr + size;
88 Entry* const entry = NewEntry(addr, addr_end, data.get());
89
90 const u64 page_end = addr_end >> PAGE_BITS;
91 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
92 invalidation_cache[page].push_back(entry);
93 }
94
95 storage.push_back(std::move(data));
96
97 rasterizer.UpdatePagesCachedCount(addr, size, 1);
98 }
99
100 /// @brief Called when a shader is going to be removed
101 /// @param shader Shader that will be removed
102 /// @pre invalidation_cache is locked
103 /// @pre lookup_mutex is locked
104 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
105
106private:
107 /// @brief Invalidate pages in a given region
108 /// @pre invalidation_mutex is locked
109 void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
110 const VAddr addr_end = addr + size;
111 const u64 page_end = addr_end >> PAGE_BITS;
112 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
113 const auto it = invalidation_cache.find(page);
114 if (it == invalidation_cache.end()) {
115 continue;
116 }
117
118 std::vector<Entry*>& entries = it->second;
119 InvalidatePageEntries(entries, addr, addr_end);
120
121 // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
122 if (entries.empty()) {
123 invalidation_cache.erase(it);
124 }
125 }
126 }
127
128 /// @brief Remove shaders marked for deletion
129 /// @pre invalidation_mutex is locked
130 void RemovePendingShaders() {
131 if (marked_for_removal.empty()) {
132 return;
133 }
134 std::scoped_lock lock{lookup_mutex};
135
136 std::vector<T*> removed_shaders;
137 removed_shaders.reserve(marked_for_removal.size());
138
139 for (Entry* const entry : marked_for_removal) {
140 if (lookup_cache.erase(entry->addr_start) > 0) {
141 removed_shaders.push_back(entry->data);
142 }
143 }
144 marked_for_removal.clear();
145
146 if (!removed_shaders.empty()) {
147 RemoveShadersFromStorage(std::move(removed_shaders));
148 }
149 }
150
151 /// @brief Invalidates entries in a given range for the passed page
152 /// @param entries Vector of entries in the page, it will be modified on overlaps
153 /// @param addr Start address of the invalidation
154 /// @param addr_end Non-inclusive end address of the invalidation
155 /// @pre invalidation_mutex is locked
156 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
157 auto it = entries.begin();
158 while (it != entries.end()) {
159 Entry* const entry = *it;
160 if (!entry->Overlaps(addr, addr_end)) {
161 ++it;
162 continue;
163 }
164 UnmarkMemory(entry);
165 marked_for_removal.push_back(entry);
166
167 it = entries.erase(it);
168 }
169 }
170
171 /// @brief Unmarks an entry from the rasterizer cache
172 /// @param entry Entry to unmark from memory
173 void UnmarkMemory(Entry* entry) {
174 if (!entry->is_memory_marked) {
175 return;
176 }
177 entry->is_memory_marked = false;
178
179 const VAddr addr = entry->addr_start;
180 const std::size_t size = entry->addr_end - addr;
181 rasterizer.UpdatePagesCachedCount(addr, size, -1);
182 }
183
184 /// @brief Removes a vector of shaders from a list
185 /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
186 /// @pre invalidation_mutex is locked
187 /// @pre lookup_mutex is locked
188 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
189 // Remove duplicates
190 std::sort(removed_shaders.begin(), removed_shaders.end());
191 removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
192 removed_shaders.end());
193
194 // Now that there are no duplicates, we can notify removals
195 for (T* const shader : removed_shaders) {
196 OnShaderRemoval(shader);
197 }
198
199 // Remove them from the cache
200 const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
201 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
202 removed_shaders.end();
203 };
204 storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
205 }
206
207 /// @brief Creates a new entry in the lookup cache and returns its pointer
208 /// @pre lookup_mutex is locked
209 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
210 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
211 Entry* const entry_pointer = entry.get();
212
213 lookup_cache.emplace(addr, std::move(entry));
214 return entry_pointer;
215 }
216
217 VideoCore::RasterizerInterface& rasterizer;
218
219 mutable std::mutex lookup_mutex;
220 std::mutex invalidation_mutex;
221
222 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
223 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
224 std::vector<std::unique_ptr<T>> storage;
225 std::vector<Entry*> marked_for_removal;
226};
227
228} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7032e0059..f476f03b0 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 77> DefinitionTable = {{ 44constexpr std::array<Table, 78> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{
98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, 98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, 99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, 100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, 102 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
102 103
103 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, 104 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 715f39d0d..0caf3b4f0 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
120 } 120 }
121 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; 121 const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
122 const auto layer{static_cast<u32>(relative_address / layer_size)}; 122 const auto layer{static_cast<u32>(relative_address / layer_size)};
123 if (layer >= params.depth) {
124 return {};
125 }
123 const GPUVAddr mipmap_address = relative_address - layer_size * layer; 126 const GPUVAddr mipmap_address = relative_address - layer_size * layer;
124 const auto mipmap_it = 127 const auto mipmap_it =
125 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); 128 Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
@@ -248,12 +251,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
248 251
249 // Use an extra temporal buffer 252 // Use an extra temporal buffer
250 auto& tmp_buffer = staging_cache.GetBuffer(1); 253 auto& tmp_buffer = staging_cache.GetBuffer(1);
251 // Special case for 3D Texture Segments
252 const bool must_read_current_data =
253 params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D;
254 tmp_buffer.resize(guest_memory_size); 254 tmp_buffer.resize(guest_memory_size);
255 host_ptr = tmp_buffer.data(); 255 host_ptr = tmp_buffer.data();
256 if (must_read_current_data) { 256
257 if (params.target == SurfaceTarget::Texture3D) {
258 // Special case for 3D texture segments
257 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); 259 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
258 } 260 }
259 261
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 79e10ffbb..173f2edba 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -217,8 +217,8 @@ public:
217 } 217 }
218 218
219 bool IsProtected() const { 219 bool IsProtected() const {
220 // Only 3D Slices are to be protected 220 // Only 3D slices are to be protected
221 return is_target && params.block_depth > 0; 221 return is_target && params.target == SurfaceTarget::Texture3D;
222 } 222 }
223 223
224 bool IsRenderTarget() const { 224 bool IsRenderTarget() const {
@@ -250,6 +250,11 @@ public:
250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); 250 return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
251 } 251 }
252 252
253 TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) {
254 return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth,
255 base_level, num_levels));
256 }
257
253 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, 258 std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params,
254 const GPUVAddr view_addr, 259 const GPUVAddr view_addr,
255 const std::size_t candidate_size, const u32 mipmap, 260 const std::size_t candidate_size, const u32 mipmap,
@@ -272,8 +277,8 @@ public:
272 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, 277 std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
273 const std::size_t candidate_size) { 278 const std::size_t candidate_size) {
274 if (params.target == SurfaceTarget::Texture3D || 279 if (params.target == SurfaceTarget::Texture3D ||
275 (params.num_levels == 1 && !params.is_layered) || 280 view_params.target == SurfaceTarget::Texture3D ||
276 view_params.target == SurfaceTarget::Texture3D) { 281 (params.num_levels == 1 && !params.is_layered)) {
277 return {}; 282 return {};
278 } 283 }
279 const auto layer_mipmap{GetLayerMipmap(view_addr)}; 284 const auto layer_mipmap{GetLayerMipmap(view_addr)};
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 884fabffe..0b2b2b8c4 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
215 params.num_levels = 1; 215 params.num_levels = 1;
216 params.emulated_levels = 1; 216 params.emulated_levels = 1;
217 217
218 const bool is_layered = config.layers > 1 && params.block_depth == 0; 218 if (config.memory_layout.is_3d != 0) {
219 params.is_layered = is_layered; 219 params.depth = config.layers.Value();
220 params.depth = is_layered ? config.layers.Value() : 1; 220 params.is_layered = false;
221 params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; 221 params.target = SurfaceTarget::Texture3D;
222 } else if (config.layers > 1) {
223 params.depth = config.layers.Value();
224 params.is_layered = true;
225 params.target = SurfaceTarget::Texture2DArray;
226 } else {
227 params.depth = 1;
228 params.is_layered = false;
229 params.target = SurfaceTarget::Texture2D;
230 }
222 return params; 231 return params;
223} 232}
224 233
@@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
237 params.width = config.width; 246 params.width = config.width;
238 params.height = config.height; 247 params.height = config.height;
239 params.pitch = config.pitch; 248 params.pitch = config.pitch;
240 // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters 249 // TODO(Rodrigo): Try to guess texture arrays from parameters
241 params.target = SurfaceTarget::Texture2D; 250 params.target = SurfaceTarget::Texture2D;
242 params.depth = 1; 251 params.depth = 1;
243 params.num_levels = 1; 252 params.num_levels = 1;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8bfc541d4..85075e868 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/container/small_vector.hpp>
17#include <boost/icl/interval_map.hpp> 18#include <boost/icl/interval_map.hpp>
18#include <boost/range/iterator_range.hpp> 19#include <boost/range/iterator_range.hpp>
19 20
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
53 54
54template <typename TSurface, typename TView> 55template <typename TSurface, typename TView>
55class TextureCache { 56class TextureCache {
57 using VectorSurface = boost::container::small_vector<TSurface, 1>;
56 58
57public: 59public:
58 void InvalidateRegion(VAddr addr, std::size_t size) { 60 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -296,30 +298,30 @@ public:
296 const GPUVAddr src_gpu_addr = src_config.Address(); 298 const GPUVAddr src_gpu_addr = src_config.Address();
297 const GPUVAddr dst_gpu_addr = dst_config.Address(); 299 const GPUVAddr dst_gpu_addr = dst_config.Address();
298 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); 300 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
299 const std::optional<VAddr> dst_cpu_addr = 301
300 system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); 302 const auto& memory_manager = system.GPU().MemoryManager();
301 const std::optional<VAddr> src_cpu_addr = 303 const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
302 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); 304 const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
303 std::pair<TSurface, TView> dst_surface = 305 std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
304 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); 306 TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
305 std::pair<TSurface, TView> src_surface = 307 ImageBlit(src_surface, dst_surface.second, copy_config);
306 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false);
307 ImageBlit(src_surface.second, dst_surface.second, copy_config);
308 dst_surface.first->MarkAsModified(true, Tick()); 308 dst_surface.first->MarkAsModified(true, Tick());
309 } 309 }
310 310
311 TSurface TryFindFramebufferSurface(VAddr addr) { 311 TSurface TryFindFramebufferSurface(VAddr addr) const {
312 if (!addr) { 312 if (!addr) {
313 return nullptr; 313 return nullptr;
314 } 314 }
315 const VAddr page = addr >> registry_page_bits; 315 const VAddr page = addr >> registry_page_bits;
316 std::vector<TSurface>& list = registry[page]; 316 const auto it = registry.find(page);
317 for (auto& surface : list) { 317 if (it == registry.end()) {
318 if (surface->GetCpuAddr() == addr) { 318 return nullptr;
319 return surface;
320 }
321 } 319 }
322 return nullptr; 320 const auto& list = it->second;
321 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
322 return surface->GetCpuAddr() == addr;
323 });
324 return found != list.end() ? *found : nullptr;
323 } 325 }
324 326
325 u64 Tick() { 327 u64 Tick() {
@@ -498,18 +500,18 @@ private:
498 * @param untopological Indicates to the recycler that the texture has no way 500 * @param untopological Indicates to the recycler that the texture has no way
499 * to match the overlaps due to topological reasons. 501 * to match the overlaps due to topological reasons.
500 **/ 502 **/
501 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 503 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
502 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 504 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
503 if (Settings::IsGPULevelExtreme()) { 505 if (Settings::IsGPULevelExtreme()) {
504 return RecycleStrategy::Flush; 506 return RecycleStrategy::Flush;
505 } 507 }
506 // 3D Textures decision 508 // 3D Textures decision
507 if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { 509 if (params.target == SurfaceTarget::Texture3D) {
508 return RecycleStrategy::Flush; 510 return RecycleStrategy::Flush;
509 } 511 }
510 for (const auto& s : overlaps) { 512 for (const auto& s : overlaps) {
511 const auto& s_params = s->GetSurfaceParams(); 513 const auto& s_params = s->GetSurfaceParams();
512 if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { 514 if (s_params.target == SurfaceTarget::Texture3D) {
513 return RecycleStrategy::Flush; 515 return RecycleStrategy::Flush;
514 } 516 }
515 } 517 }
@@ -538,9 +540,8 @@ private:
538 * @param untopological Indicates to the recycler that the texture has no way to match the 540 * @param untopological Indicates to the recycler that the texture has no way to match the
539 * overlaps due to topological reasons. 541 * overlaps due to topological reasons.
540 **/ 542 **/
541 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 543 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
542 const SurfaceParams& params, const GPUVAddr gpu_addr, 544 const GPUVAddr gpu_addr, const bool preserve_contents,
543 const bool preserve_contents,
544 const MatchTopologyResult untopological) { 545 const MatchTopologyResult untopological) {
545 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 546 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
546 for (auto& surface : overlaps) { 547 for (auto& surface : overlaps) {
@@ -650,47 +651,65 @@ private:
650 * @param params The parameters on the new surface. 651 * @param params The parameters on the new surface.
651 * @param gpu_addr The starting address of the new surface. 652 * @param gpu_addr The starting address of the new surface.
652 **/ 653 **/
653 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 654 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
654 const SurfaceParams& params, 655 const SurfaceParams& params,
655 const GPUVAddr gpu_addr) { 656 GPUVAddr gpu_addr) {
656 if (params.target == SurfaceTarget::Texture3D) { 657 if (params.target == SurfaceTarget::Texture3D) {
657 return {}; 658 return std::nullopt;
658 } 659 }
659 bool modified = false; 660 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
660 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 661 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
661 u32 passed_tests = 0; 662
663 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
664 LoadSurface(new_surface);
665 for (const auto& surface : overlaps) {
666 Unregister(surface);
667 }
668 Register(new_surface);
669 return {{new_surface, new_surface->GetMainView()}};
670 }
671
672 std::size_t passed_tests = 0;
662 for (auto& surface : overlaps) { 673 for (auto& surface : overlaps) {
663 const SurfaceParams& src_params = surface->GetSurfaceParams(); 674 const SurfaceParams& src_params = surface->GetSurfaceParams();
664 if (src_params.is_layered || src_params.num_levels > 1) { 675 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
665 // We send this cases to recycle as they are more complex to handle
666 return {};
667 }
668 const std::size_t candidate_size = surface->GetSizeInBytes();
669 auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
670 if (!mipmap_layer) { 676 if (!mipmap_layer) {
671 continue; 677 continue;
672 } 678 }
673 const auto [layer, mipmap] = *mipmap_layer; 679 const auto [base_layer, base_mipmap] = *mipmap_layer;
674 if (new_surface->GetMipmapSize(mipmap) != candidate_size) { 680 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
675 continue; 681 continue;
676 } 682 }
677 modified |= surface->IsModified(); 683 ++passed_tests;
678 // Now we got all the data set up 684
679 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); 685 // Copy all mipmaps and layers
680 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); 686 const u32 block_width = params.GetDefaultBlockWidth();
681 const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); 687 const u32 block_height = params.GetDefaultBlockHeight();
682 passed_tests++; 688 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
683 ImageCopy(surface, new_surface, copy_params); 689 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
690 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
691 if (width < block_width || height < block_height) {
692 // Current APIs forbid copying small compressed textures, avoid errors
693 break;
694 }
695 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
696 src_params.depth);
697 ImageCopy(surface, new_surface, copy_params);
698 }
684 } 699 }
685 if (passed_tests == 0) { 700 if (passed_tests == 0) {
686 return {}; 701 return std::nullopt;
702 }
703 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
687 // In Accurate GPU all tests should pass, else we recycle 704 // In Accurate GPU all tests should pass, else we recycle
688 } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { 705 return std::nullopt;
689 return {};
690 } 706 }
707
708 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
691 for (const auto& surface : overlaps) { 709 for (const auto& surface : overlaps) {
692 Unregister(surface); 710 Unregister(surface);
693 } 711 }
712
694 new_surface->MarkAsModified(modified, Tick()); 713 new_surface->MarkAsModified(modified, Tick());
695 Register(new_surface); 714 Register(new_surface);
696 return {{new_surface, new_surface->GetMainView()}}; 715 return {{new_surface, new_surface->GetMainView()}};
@@ -708,53 +727,11 @@ private:
708 * @param preserve_contents Indicates that the new surface should be loaded from memory or 727 * @param preserve_contents Indicates that the new surface should be loaded from memory or
709 * left blank. 728 * left blank.
710 */ 729 */
711 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 730 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
712 const SurfaceParams& params, 731 const SurfaceParams& params,
713 const GPUVAddr gpu_addr, 732 GPUVAddr gpu_addr, VAddr cpu_addr,
714 const VAddr cpu_addr,
715 bool preserve_contents) { 733 bool preserve_contents) {
716 if (params.target == SurfaceTarget::Texture3D) { 734 if (params.target != SurfaceTarget::Texture3D) {
717 bool failed = false;
718 if (params.num_levels > 1) {
719 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
720 return std::nullopt;
721 }
722 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
723 bool modified = false;
724 for (auto& surface : overlaps) {
725 const SurfaceParams& src_params = surface->GetSurfaceParams();
726 if (src_params.target != SurfaceTarget::Texture2D) {
727 failed = true;
728 break;
729 }
730 if (src_params.height != params.height) {
731 failed = true;
732 break;
733 }
734 if (src_params.block_depth != params.block_depth ||
735 src_params.block_height != params.block_height) {
736 failed = true;
737 break;
738 }
739 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
740 const auto offsets = params.GetBlockOffsetXYZ(offset);
741 const auto z = std::get<2>(offsets);
742 modified |= surface->IsModified();
743 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
744 1);
745 ImageCopy(surface, new_surface, copy_params);
746 }
747 if (failed) {
748 return std::nullopt;
749 }
750 for (const auto& surface : overlaps) {
751 Unregister(surface);
752 }
753 new_surface->MarkAsModified(modified, Tick());
754 Register(new_surface);
755 auto view = new_surface->GetMainView();
756 return {{std::move(new_surface), view}};
757 } else {
758 for (const auto& surface : overlaps) { 735 for (const auto& surface : overlaps) {
759 if (!surface->MatchTarget(params.target)) { 736 if (!surface->MatchTarget(params.target)) {
760 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { 737 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
@@ -770,11 +747,60 @@ private:
770 continue; 747 continue;
771 } 748 }
772 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { 749 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
773 return {{surface, surface->GetMainView()}}; 750 return std::make_pair(surface, surface->GetMainView());
774 } 751 }
775 } 752 }
776 return InitializeSurface(gpu_addr, params, preserve_contents); 753 return InitializeSurface(gpu_addr, params, preserve_contents);
777 } 754 }
755
756 if (params.num_levels > 1) {
757 // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
758 return std::nullopt;
759 }
760
761 if (overlaps.size() == 1) {
762 const auto& surface = overlaps[0];
763 const SurfaceParams& overlap_params = surface->GetSurfaceParams();
764 // Don't attempt to render to textures with more than one level for now
765 // The texture has to be to the right or the sample address if we want to render to it
766 if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) {
767 const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr());
768 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
769 if (slice < overlap_params.depth) {
770 auto view = surface->Emplace3DView(slice, params.depth, 0, 1);
771 return std::make_pair(std::move(surface), std::move(view));
772 }
773 }
774 }
775
776 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
777 bool modified = false;
778
779 for (auto& surface : overlaps) {
780 const SurfaceParams& src_params = surface->GetSurfaceParams();
781 if (src_params.target != SurfaceTarget::Texture2D ||
782 src_params.height != params.height ||
783 src_params.block_depth != params.block_depth ||
784 src_params.block_height != params.block_height) {
785 return std::nullopt;
786 }
787 modified |= surface->IsModified();
788
789 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
790 const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset));
791 const u32 width = params.width;
792 const u32 height = params.height;
793 const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1);
794 ImageCopy(surface, new_surface, copy_params);
795 }
796 for (const auto& surface : overlaps) {
797 Unregister(surface);
798 }
799 new_surface->MarkAsModified(modified, Tick());
800 Register(new_surface);
801
802 TView view = new_surface->GetMainView();
803 return std::make_pair(std::move(new_surface), std::move(view));
778 } 804 }
779 805
780 /** 806 /**
@@ -810,7 +836,7 @@ private:
810 TSurface& current_surface = iter->second; 836 TSurface& current_surface = iter->second;
811 const auto topological_result = current_surface->MatchesTopology(params); 837 const auto topological_result = current_surface->MatchesTopology(params);
812 if (topological_result != MatchTopologyResult::FullMatch) { 838 if (topological_result != MatchTopologyResult::FullMatch) {
813 std::vector<TSurface> overlaps{current_surface}; 839 VectorSurface overlaps{current_surface};
814 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 840 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
815 topological_result); 841 topological_result);
816 } 842 }
@@ -852,7 +878,7 @@ private:
852 } 878 }
853 } 879 }
854 880
855 // Check if it's a 3D texture 881 // Manage 3D textures
856 if (params.block_depth > 0) { 882 if (params.block_depth > 0) {
857 auto surface = 883 auto surface =
858 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); 884 Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents);
@@ -868,12 +894,9 @@ private:
868 // two things either the candidate surface is a supertexture of the overlap 894 // two things either the candidate surface is a supertexture of the overlap
869 // or they don't match in any known way. 895 // or they don't match in any known way.
870 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 896 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
871 if (current_surface->GetGpuAddr() == gpu_addr) { 897 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
872 std::optional<std::pair<TSurface, TView>> view = 898 if (view) {
873 TryReconstructSurface(overlaps, params, gpu_addr); 899 return *view;
874 if (view) {
875 return *view;
876 }
877 } 900 }
878 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 901 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
879 MatchTopologyResult::FullMatch); 902 MatchTopologyResult::FullMatch);
@@ -1030,7 +1053,7 @@ private:
1030 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, 1053 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
1031 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { 1054 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
1032 auto deduced_src = DeduceSurface(src_gpu_addr, src_params); 1055 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
1033 auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); 1056 auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params);
1034 if (deduced_src.Failed() || deduced_dst.Failed()) { 1057 if (deduced_src.Failed() || deduced_dst.Failed()) {
1035 return; 1058 return;
1036 } 1059 }
@@ -1126,23 +1149,25 @@ private:
1126 } 1149 }
1127 } 1150 }
1128 1151
1129 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1152 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1130 if (size == 0) { 1153 if (size == 0) {
1131 return {}; 1154 return {};
1132 } 1155 }
1133 const VAddr cpu_addr_end = cpu_addr + size; 1156 const VAddr cpu_addr_end = cpu_addr + size;
1134 VAddr start = cpu_addr >> registry_page_bits;
1135 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1157 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1136 std::vector<TSurface> surfaces; 1158 VectorSurface surfaces;
1137 while (start <= end) { 1159 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1138 std::vector<TSurface>& list = registry[start]; 1160 const auto it = registry.find(start);
1139 for (auto& surface : list) { 1161 if (it == registry.end()) {
1140 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { 1162 continue;
1141 surface->MarkAsPicked(true); 1163 }
1142 surfaces.push_back(surface); 1164 for (auto& surface : it->second) {
1165 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1166 continue;
1143 } 1167 }
1168 surface->MarkAsPicked(true);
1169 surfaces.push_back(surface);
1144 } 1170 }
1145 start++;
1146 } 1171 }
1147 for (auto& surface : surfaces) { 1172 for (auto& surface : surfaces) {
1148 surface->MarkAsPicked(false); 1173 surface->MarkAsPicked(false);
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 8b9404718..75c27e39e 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -208,6 +208,10 @@ if (MSVC)
208 copy_yuzu_unicorn_deps(yuzu) 208 copy_yuzu_unicorn_deps(yuzu)
209endif() 209endif()
210 210
211if (NOT APPLE)
212 target_compile_definitions(yuzu PRIVATE HAS_OPENGL)
213endif()
214
211if (ENABLE_VULKAN) 215if (ENABLE_VULKAN)
212 target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include) 216 target_include_directories(yuzu PRIVATE ../../externals/Vulkan-Headers/include)
213 target_compile_definitions(yuzu PRIVATE HAS_VULKAN) 217 target_compile_definitions(yuzu PRIVATE HAS_VULKAN)
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 1adf8932b..bfeb16458 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -8,13 +8,16 @@
8#include <QHBoxLayout> 8#include <QHBoxLayout>
9#include <QKeyEvent> 9#include <QKeyEvent>
10#include <QMessageBox> 10#include <QMessageBox>
11#include <QOffscreenSurface>
12#include <QOpenGLContext>
13#include <QPainter> 11#include <QPainter>
14#include <QScreen> 12#include <QScreen>
15#include <QStringList> 13#include <QStringList>
16#include <QWindow> 14#include <QWindow>
17 15
16#ifdef HAS_OPENGL
17#include <QOffscreenSurface>
18#include <QOpenGLContext>
19#endif
20
18#if !defined(WIN32) && HAS_VULKAN 21#if !defined(WIN32) && HAS_VULKAN
19#include <qpa/qplatformnativeinterface.h> 22#include <qpa/qplatformnativeinterface.h>
20#endif 23#endif
@@ -98,6 +101,7 @@ void EmuThread::run() {
98#endif 101#endif
99} 102}
100 103
104#ifdef HAS_OPENGL
101class OpenGLSharedContext : public Core::Frontend::GraphicsContext { 105class OpenGLSharedContext : public Core::Frontend::GraphicsContext {
102public: 106public:
103 /// Create the original context that should be shared from 107 /// Create the original context that should be shared from
@@ -106,6 +110,9 @@ public:
106 format.setVersion(4, 3); 110 format.setVersion(4, 3);
107 format.setProfile(QSurfaceFormat::CompatibilityProfile); 111 format.setProfile(QSurfaceFormat::CompatibilityProfile);
108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); 112 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
113 if (Settings::values.renderer_debug) {
114 format.setOption(QSurfaceFormat::FormatOption::DebugContext);
115 }
109 // TODO: expose a setting for buffer value (ie default/single/double/triple) 116 // TODO: expose a setting for buffer value (ie default/single/double/triple)
110 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); 117 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
111 format.setSwapInterval(0); 118 format.setSwapInterval(0);
@@ -180,6 +187,7 @@ private:
180 std::unique_ptr<QOffscreenSurface> offscreen_surface{}; 187 std::unique_ptr<QOffscreenSurface> offscreen_surface{};
181 QSurface* surface; 188 QSurface* surface;
182}; 189};
190#endif
183 191
184class DummyContext : public Core::Frontend::GraphicsContext {}; 192class DummyContext : public Core::Frontend::GraphicsContext {};
185 193
@@ -352,7 +360,7 @@ QByteArray GRenderWindow::saveGeometry() {
352} 360}
353 361
354qreal GRenderWindow::windowPixelRatio() const { 362qreal GRenderWindow::windowPixelRatio() const {
355 return devicePixelRatio(); 363 return devicePixelRatioF();
356} 364}
357 365
358std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const { 366std::pair<u32, u32> GRenderWindow::ScaleTouch(const QPointF& pos) const {
@@ -470,6 +478,7 @@ void GRenderWindow::resizeEvent(QResizeEvent* event) {
470} 478}
471 479
472std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { 480std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
481#ifdef HAS_OPENGL
473 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) { 482 if (Settings::values.renderer_backend == Settings::RendererBackend::OpenGL) {
474 auto c = static_cast<OpenGLSharedContext*>(main_context.get()); 483 auto c = static_cast<OpenGLSharedContext*>(main_context.get());
475 // Bind the shared contexts to the main surface in case the backend wants to take over 484 // Bind the shared contexts to the main surface in case the backend wants to take over
@@ -477,6 +486,7 @@ std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedCont
477 return std::make_unique<OpenGLSharedContext>(c->GetShareContext(), 486 return std::make_unique<OpenGLSharedContext>(c->GetShareContext(),
478 child_widget->windowHandle()); 487 child_widget->windowHandle());
479 } 488 }
489#endif
480 return std::make_unique<DummyContext>(); 490 return std::make_unique<DummyContext>();
481} 491}
482 492
@@ -557,6 +567,7 @@ void GRenderWindow::OnMinimalClientAreaChangeRequest(std::pair<u32, u32> minimal
557} 567}
558 568
559bool GRenderWindow::InitializeOpenGL() { 569bool GRenderWindow::InitializeOpenGL() {
570#ifdef HAS_OPENGL
560 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground, 571 // TODO: One of these flags might be interesting: WA_OpaquePaintEvent, WA_NoBackground,
561 // WA_DontShowOnScreen, WA_DeleteOnClose 572 // WA_DontShowOnScreen, WA_DeleteOnClose
562 auto child = new OpenGLRenderWidget(this); 573 auto child = new OpenGLRenderWidget(this);
@@ -568,6 +579,11 @@ bool GRenderWindow::InitializeOpenGL() {
568 std::make_unique<OpenGLSharedContext>(context->GetShareContext(), child->windowHandle())); 579 std::make_unique<OpenGLSharedContext>(context->GetShareContext(), child->windowHandle()));
569 580
570 return true; 581 return true;
582#else
583 QMessageBox::warning(this, tr("OpenGL not available!"),
584 tr("yuzu has not been compiled with OpenGL support."));
585 return false;
586#endif
571} 587}
572 588
573bool GRenderWindow::InitializeVulkan() { 589bool GRenderWindow::InitializeVulkan() {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index b08b87426..bbbd96113 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -211,7 +211,7 @@ const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> Config::default
211// This must be in alphabetical order according to action name as it must have the same order as 211// This must be in alphabetical order according to action name as it must have the same order as
212// UISetting::values.shortcuts, which is alphabetically ordered. 212// UISetting::values.shortcuts, which is alphabetically ordered.
213// clang-format off 213// clang-format off
214const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{ 214const std::array<UISettings::Shortcut, 16> Config::default_hotkeys{{
215 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}}, 215 {QStringLiteral("Capture Screenshot"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+P"), Qt::ApplicationShortcut}},
216 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}}, 216 {QStringLiteral("Change Docked Mode"), QStringLiteral("Main Window"), {QStringLiteral("F10"), Qt::ApplicationShortcut}},
217 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}}, 217 {QStringLiteral("Continue/Pause Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F4"), Qt::WindowShortcut}},
@@ -222,6 +222,7 @@ const std::array<UISettings::Shortcut, 15> Config::default_hotkeys{{
222 {QStringLiteral("Increase Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}}, 222 {QStringLiteral("Increase Speed Limit"), QStringLiteral("Main Window"), {QStringLiteral("+"), Qt::ApplicationShortcut}},
223 {QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}}, 223 {QStringLiteral("Load Amiibo"), QStringLiteral("Main Window"), {QStringLiteral("F2"), Qt::ApplicationShortcut}},
224 {QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}}, 224 {QStringLiteral("Load File"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+O"), Qt::WindowShortcut}},
225 {QStringLiteral("Mute Audio"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+M"), Qt::WindowShortcut}},
225 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}}, 226 {QStringLiteral("Restart Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F6"), Qt::WindowShortcut}},
226 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}}, 227 {QStringLiteral("Stop Emulation"), QStringLiteral("Main Window"), {QStringLiteral("F5"), Qt::WindowShortcut}},
227 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}}, 228 {QStringLiteral("Toggle Filter Bar"), QStringLiteral("Main Window"), {QStringLiteral("Ctrl+F"), Qt::WindowShortcut}},
@@ -533,6 +534,8 @@ void Config::ReadDebuggingValues() {
533 Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); 534 Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool();
534 Settings::values.disable_cpu_opt = 535 Settings::values.disable_cpu_opt =
535 ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool(); 536 ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool();
537 Settings::values.disable_macro_jit =
538 ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool();
536 539
537 qt_config->endGroup(); 540 qt_config->endGroup();
538} 541}
@@ -629,13 +632,11 @@ void Config::ReadRendererValues() {
629 static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); 632 static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt());
630 Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); 633 Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool();
631 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); 634 Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt();
632 Settings::values.resolution_factor =
633 ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat();
634 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); 635 Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt();
635 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); 636 Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt();
636 Settings::values.use_frame_limit = 637 Settings::values.use_frame_limit =
637 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); 638 ReadSetting(QStringLiteral("use_frame_limit"), true).toBool();
638 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 639 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt();
639 Settings::values.use_disk_shader_cache = 640 Settings::values.use_disk_shader_cache =
640 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); 641 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
641 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); 642 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
@@ -720,8 +721,6 @@ void Config::ReadUIValues() {
720 .toString(); 721 .toString();
721 UISettings::values.enable_discord_presence = 722 UISettings::values.enable_discord_presence =
722 ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); 723 ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool();
723 UISettings::values.screenshot_resolution_factor =
724 static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt());
725 UISettings::values.select_user_on_boot = 724 UISettings::values.select_user_on_boot =
726 ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); 725 ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool();
727 726
@@ -1011,6 +1010,7 @@ void Config::SaveDebuggingValues() {
1011 WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); 1010 WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false);
1012 WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); 1011 WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false);
1013 WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false); 1012 WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false);
1013 WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false);
1014 1014
1015 qt_config->endGroup(); 1015 qt_config->endGroup();
1016} 1016}
@@ -1079,8 +1079,6 @@ void Config::SaveRendererValues() {
1079 WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); 1079 WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0);
1080 WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); 1080 WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false);
1081 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); 1081 WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0);
1082 WriteSetting(QStringLiteral("resolution_factor"),
1083 static_cast<double>(Settings::values.resolution_factor), 1.0);
1084 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); 1082 WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0);
1085 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); 1083 WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0);
1086 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); 1084 WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true);
@@ -1156,8 +1154,6 @@ void Config::SaveUIValues() {
1156 QString::fromUtf8(UISettings::themes[0].second)); 1154 QString::fromUtf8(UISettings::themes[0].second));
1157 WriteSetting(QStringLiteral("enable_discord_presence"), 1155 WriteSetting(QStringLiteral("enable_discord_presence"),
1158 UISettings::values.enable_discord_presence, true); 1156 UISettings::values.enable_discord_presence, true);
1159 WriteSetting(QStringLiteral("screenshot_resolution_factor"),
1160 UISettings::values.screenshot_resolution_factor, 0);
1161 WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, 1157 WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot,
1162 false); 1158 false);
1163 1159
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 5cd2a5feb..09316382c 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -27,7 +27,7 @@ public:
27 default_mouse_buttons; 27 default_mouse_buttons;
28 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys; 28 static const std::array<int, Settings::NativeKeyboard::NumKeyboardKeys> default_keyboard_keys;
29 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods; 29 static const std::array<int, Settings::NativeKeyboard::NumKeyboardMods> default_keyboard_mods;
30 static const std::array<UISettings::Shortcut, 15> default_hotkeys; 30 static const std::array<UISettings::Shortcut, 16> default_hotkeys;
31 31
32private: 32private:
33 void ReadValues(); 33 void ReadValues();
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index c2026763e..2c77441fd 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -39,6 +39,8 @@ void ConfigureDebug::SetConfiguration() {
39 ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt); 39 ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt);
40 ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); 40 ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn());
41 ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); 41 ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug);
42 ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
43 ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit);
42} 44}
43 45
44void ConfigureDebug::ApplyConfiguration() { 46void ConfigureDebug::ApplyConfiguration() {
@@ -51,6 +53,7 @@ void ConfigureDebug::ApplyConfiguration() {
51 Settings::values.quest_flag = ui->quest_flag->isChecked(); 53 Settings::values.quest_flag = ui->quest_flag->isChecked();
52 Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked(); 54 Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked();
53 Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); 55 Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked();
56 Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked();
54 Debugger::ToggleConsole(); 57 Debugger::ToggleConsole();
55 Log::Filter filter; 58 Log::Filter filter;
56 filter.ParseFilterString(Settings::values.log_filter); 59 filter.ParseFilterString(Settings::values.log_filter);
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui
index e0d4c4a44..46f0208c6 100644
--- a/src/yuzu/configuration/configure_debug.ui
+++ b/src/yuzu/configuration/configure_debug.ui
@@ -148,6 +148,19 @@
148 </property> 148 </property>
149 </widget> 149 </widget>
150 </item> 150 </item>
151 <item>
152 <widget class="QCheckBox" name="disable_macro_jit">
153 <property name="enabled">
154 <bool>true</bool>
155 </property>
156 <property name="whatsThis">
157 <string>When checked, it disables the macro Just In Time compiler. Enabled this makes games run slower</string>
158 </property>
159 <property name="text">
160 <string>Disable Macro JIT</string>
161 </property>
162 </widget>
163 </item>
151 </layout> 164 </layout>
152 </widget> 165 </widget>
153 </item> 166 </item>
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index ea667caef..304625cd7 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -19,47 +19,6 @@
19#include "video_core/renderer_vulkan/renderer_vulkan.h" 19#include "video_core/renderer_vulkan/renderer_vulkan.h"
20#endif 20#endif
21 21
22namespace {
23enum class Resolution : int {
24 Auto,
25 Scale1x,
26 Scale2x,
27 Scale3x,
28 Scale4x,
29};
30
31float ToResolutionFactor(Resolution option) {
32 switch (option) {
33 case Resolution::Auto:
34 return 0.f;
35 case Resolution::Scale1x:
36 return 1.f;
37 case Resolution::Scale2x:
38 return 2.f;
39 case Resolution::Scale3x:
40 return 3.f;
41 case Resolution::Scale4x:
42 return 4.f;
43 }
44 return 0.f;
45}
46
47Resolution FromResolutionFactor(float factor) {
48 if (factor == 0.f) {
49 return Resolution::Auto;
50 } else if (factor == 1.f) {
51 return Resolution::Scale1x;
52 } else if (factor == 2.f) {
53 return Resolution::Scale2x;
54 } else if (factor == 3.f) {
55 return Resolution::Scale3x;
56 } else if (factor == 4.f) {
57 return Resolution::Scale4x;
58 }
59 return Resolution::Auto;
60}
61} // Anonymous namespace
62
63ConfigureGraphics::ConfigureGraphics(QWidget* parent) 22ConfigureGraphics::ConfigureGraphics(QWidget* parent)
64 : QWidget(parent), ui(new Ui::ConfigureGraphics) { 23 : QWidget(parent), ui(new Ui::ConfigureGraphics) {
65 vulkan_device = Settings::values.vulkan_device; 24 vulkan_device = Settings::values.vulkan_device;
@@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() {
99 58
100 ui->api->setEnabled(runtime_lock); 59 ui->api->setEnabled(runtime_lock);
101 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); 60 ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend));
102 ui->resolution_factor_combobox->setCurrentIndex(
103 static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
104 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); 61 ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio);
105 ui->use_disk_shader_cache->setEnabled(runtime_lock); 62 ui->use_disk_shader_cache->setEnabled(runtime_lock);
106 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); 63 ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache);
@@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() {
114void ConfigureGraphics::ApplyConfiguration() { 71void ConfigureGraphics::ApplyConfiguration() {
115 Settings::values.renderer_backend = GetCurrentGraphicsBackend(); 72 Settings::values.renderer_backend = GetCurrentGraphicsBackend();
116 Settings::values.vulkan_device = vulkan_device; 73 Settings::values.vulkan_device = vulkan_device;
117 Settings::values.resolution_factor =
118 ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
119 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); 74 Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex();
120 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); 75 Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked();
121 Settings::values.use_asynchronous_gpu_emulation = 76 Settings::values.use_asynchronous_gpu_emulation =
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index c816d6108..6e75447a5 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -85,46 +85,6 @@
85 </widget> 85 </widget>
86 </item> 86 </item>
87 <item> 87 <item>
88 <layout class="QHBoxLayout" name="horizontalLayout_2">
89 <item>
90 <widget class="QLabel" name="label">
91 <property name="text">
92 <string>Internal Resolution:</string>
93 </property>
94 </widget>
95 </item>
96 <item>
97 <widget class="QComboBox" name="resolution_factor_combobox">
98 <item>
99 <property name="text">
100 <string>Auto (Window Size)</string>
101 </property>
102 </item>
103 <item>
104 <property name="text">
105 <string>Native (1280x720)</string>
106 </property>
107 </item>
108 <item>
109 <property name="text">
110 <string>2x Native (2560x1440)</string>
111 </property>
112 </item>
113 <item>
114 <property name="text">
115 <string>3x Native (3840x2160)</string>
116 </property>
117 </item>
118 <item>
119 <property name="text">
120 <string>4x Native (5120x2880)</string>
121 </property>
122 </item>
123 </widget>
124 </item>
125 </layout>
126 </item>
127 <item>
128 <layout class="QHBoxLayout" name="horizontalLayout_6"> 88 <layout class="QHBoxLayout" name="horizontalLayout_6">
129 <item> 89 <item>
130 <widget class="QLabel" name="ar_label"> 90 <widget class="QLabel" name="ar_label">
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 37aadf7f8..be5006ad3 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,9 +12,6 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
12 12
13 ui->setupUi(this); 13 ui->setupUi(this);
14 14
15 // TODO: Remove this after assembly shaders are fully integrated
16 ui->use_assembly_shaders->setVisible(false);
17
18 SetConfiguration(); 15 SetConfiguration();
19} 16}
20 17
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index e4eb5594b..a05fa64ba 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -480,7 +480,9 @@ void ConfigureInputPlayer::RestoreDefaults() {
480 SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]); 480 SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]);
481 } 481 }
482 } 482 }
483
483 UpdateButtonLabels(); 484 UpdateButtonLabels();
485 ApplyConfiguration();
484} 486}
485 487
486void ConfigureInputPlayer::ClearAll() { 488void ConfigureInputPlayer::ClearAll() {
@@ -505,6 +507,7 @@ void ConfigureInputPlayer::ClearAll() {
505 } 507 }
506 508
507 UpdateButtonLabels(); 509 UpdateButtonLabels();
510 ApplyConfiguration();
508} 511}
509 512
510void ConfigureInputPlayer::UpdateButtonLabels() { 513void ConfigureInputPlayer::UpdateButtonLabels() {
diff --git a/src/yuzu/configuration/configure_service.cpp b/src/yuzu/configuration/configure_service.cpp
index 06566e981..0de7a4f0b 100644
--- a/src/yuzu/configuration/configure_service.cpp
+++ b/src/yuzu/configuration/configure_service.cpp
@@ -68,6 +68,7 @@ void ConfigureService::SetConfiguration() {
68} 68}
69 69
70std::pair<QString, QString> ConfigureService::BCATDownloadEvents() { 70std::pair<QString, QString> ConfigureService::BCATDownloadEvents() {
71#ifdef YUZU_ENABLE_BOXCAT
71 std::optional<std::string> global; 72 std::optional<std::string> global;
72 std::map<std::string, Service::BCAT::EventStatus> map; 73 std::map<std::string, Service::BCAT::EventStatus> map;
73 const auto res = Service::BCAT::Boxcat::GetStatus(global, map); 74 const auto res = Service::BCAT::Boxcat::GetStatus(global, map);
@@ -105,7 +106,10 @@ std::pair<QString, QString> ConfigureService::BCATDownloadEvents() {
105 .arg(QString::fromStdString(key)) 106 .arg(QString::fromStdString(key))
106 .arg(FormatEventStatusString(value)); 107 .arg(FormatEventStatusString(value));
107 } 108 }
108 return {QStringLiteral("Current Boxcat Events"), std::move(out)}; 109 return {tr("Current Boxcat Events"), std::move(out)};
110#else
111 return {tr("Current Boxcat Events"), tr("There are currently no events on boxcat.")};
112#endif
109} 113}
110 114
111void ConfigureService::OnBCATImplChanged() { 115void ConfigureService::OnBCATImplChanged() {
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 270cccc77..ba69139e5 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -56,6 +56,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
56#include <QShortcut> 56#include <QShortcut>
57#include <QStatusBar> 57#include <QStatusBar>
58#include <QSysInfo> 58#include <QSysInfo>
59#include <QUrl>
59#include <QtConcurrent/QtConcurrent> 60#include <QtConcurrent/QtConcurrent>
60 61
61#include <fmt/format.h> 62#include <fmt/format.h>
@@ -217,7 +218,20 @@ GMainWindow::GMainWindow()
217 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch, 218 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
218 Common::g_scm_desc); 219 Common::g_scm_desc);
219#ifdef ARCHITECTURE_x86_64 220#ifdef ARCHITECTURE_x86_64
220 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); 221 const auto& caps = Common::GetCPUCaps();
222 std::string cpu_string = caps.cpu_string;
223 if (caps.avx || caps.avx2 || caps.avx512) {
224 cpu_string += " | AVX";
225 if (caps.avx512) {
226 cpu_string += "512";
227 } else if (caps.avx2) {
228 cpu_string += '2';
229 }
230 if (caps.fma || caps.fma4) {
231 cpu_string += " | FMA";
232 }
233 }
234 LOG_INFO(Frontend, "Host CPU: {}", cpu_string);
221#endif 235#endif
222 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); 236 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
223 LOG_INFO(Frontend, "Host RAM: {:.2f} GB", 237 LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
@@ -689,10 +703,7 @@ void GMainWindow::InitializeHotkeys() {
689 Settings::values.use_frame_limit = !Settings::values.use_frame_limit; 703 Settings::values.use_frame_limit = !Settings::values.use_frame_limit;
690 UpdateStatusBar(); 704 UpdateStatusBar();
691 }); 705 });
692 // TODO: Remove this comment/static whenever the next major release of 706 constexpr u16 SPEED_LIMIT_STEP = 5;
693 // MSVC occurs and we make it a requirement (see:
694 // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html)
695 static constexpr u16 SPEED_LIMIT_STEP = 5;
696 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), 707 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this),
697 &QShortcut::activated, this, [&] { 708 &QShortcut::activated, this, [&] {
698 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { 709 if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) {
@@ -726,6 +737,9 @@ void GMainWindow::InitializeHotkeys() {
726 Settings::values.use_docked_mode); 737 Settings::values.use_docked_mode);
727 dock_status_button->setChecked(Settings::values.use_docked_mode); 738 dock_status_button->setChecked(Settings::values.use_docked_mode);
728 }); 739 });
740 connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Mute Audio"), this),
741 &QShortcut::activated, this,
742 [] { Settings::values.audio_muted = !Settings::values.audio_muted; });
729} 743}
730 744
731void GMainWindow::SetDefaultUIGeometry() { 745void GMainWindow::SetDefaultUIGeometry() {
@@ -826,6 +840,7 @@ void GMainWindow::ConnectMenuEvents() {
826 connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame); 840 connect(ui.action_Stop, &QAction::triggered, this, &GMainWindow::OnStopGame);
827 connect(ui.action_Report_Compatibility, &QAction::triggered, this, 841 connect(ui.action_Report_Compatibility, &QAction::triggered, this,
828 &GMainWindow::OnMenuReportCompatibility); 842 &GMainWindow::OnMenuReportCompatibility);
843 connect(ui.action_Open_Mods_Page, &QAction::triggered, this, &GMainWindow::OnOpenModsPage);
829 connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); }); 844 connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); });
830 connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure); 845 connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure);
831 846
@@ -1797,6 +1812,16 @@ void GMainWindow::OnMenuReportCompatibility() {
1797 } 1812 }
1798} 1813}
1799 1814
1815void GMainWindow::OnOpenModsPage() {
1816 const auto mods_page_url = QStringLiteral("https://github.com/yuzu-emu/yuzu/wiki/Switch-Mods");
1817 const QUrl mods_page(mods_page_url);
1818 const bool open = QDesktopServices::openUrl(mods_page);
1819 if (!open) {
1820 QMessageBox::warning(this, tr("Error opening URL"),
1821 tr("Unable to open the URL \"%1\".").arg(mods_page_url));
1822 }
1823}
1824
1800void GMainWindow::ToggleFullscreen() { 1825void GMainWindow::ToggleFullscreen() {
1801 if (!emulation_running) { 1826 if (!emulation_running) {
1802 return; 1827 return;
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 4f4c8ddbe..d55e55cc6 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -181,6 +181,7 @@ private slots:
181 void OnPauseGame(); 181 void OnPauseGame();
182 void OnStopGame(); 182 void OnStopGame();
183 void OnMenuReportCompatibility(); 183 void OnMenuReportCompatibility();
184 void OnOpenModsPage();
184 /// Called whenever a user selects a game in the game list widget. 185 /// Called whenever a user selects a game in the game list widget.
185 void OnGameListLoadFile(QString game_path); 186 void OnGameListLoadFile(QString game_path);
186 void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path); 187 void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 97c90f50b..b5745dfd5 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -113,6 +113,7 @@
113 <string>&amp;Help</string> 113 <string>&amp;Help</string>
114 </property> 114 </property>
115 <addaction name="action_Report_Compatibility"/> 115 <addaction name="action_Report_Compatibility"/>
116 <addaction name="action_Open_Mods_Page"/>
116 <addaction name="separator"/> 117 <addaction name="separator"/>
117 <addaction name="action_About"/> 118 <addaction name="action_About"/>
118 </widget> 119 </widget>
@@ -256,6 +257,11 @@
256 <bool>false</bool> 257 <bool>false</bool>
257 </property> 258 </property>
258 </action> 259 </action>
260 <action name="action_Open_Mods_Page">
261 <property name="text">
262 <string>Open Mods Page</string>
263 </property>
264 </action>
259 <action name="action_Open_yuzu_Folder"> 265 <action name="action_Open_yuzu_Folder">
260 <property name="text"> 266 <property name="text">
261 <string>Open yuzu Folder</string> 267 <string>Open yuzu Folder</string>
diff --git a/src/yuzu/yuzu.rc b/src/yuzu/yuzu.rc
index 1b253653f..4a3645a71 100644
--- a/src/yuzu/yuzu.rc
+++ b/src/yuzu/yuzu.rc
@@ -16,4 +16,4 @@ IDI_ICON1 ICON "../../dist/yuzu.ico"
16// RT_MANIFEST 16// RT_MANIFEST
17// 17//
18 18
191 RT_MANIFEST "../../dist/yuzu.manifest" 190 RT_MANIFEST "../../dist/yuzu.manifest"
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index c20d48c42..659b9f701 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -380,8 +380,6 @@ void Config::ReadValues() {
380 Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); 380 Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false);
381 Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); 381 Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0);
382 382
383 Settings::values.resolution_factor =
384 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
385 Settings::values.aspect_ratio = 383 Settings::values.aspect_ratio =
386 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 384 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
387 Settings::values.max_anisotropy = 385 Settings::values.max_anisotropy =
@@ -432,6 +430,8 @@ void Config::ReadValues() {
432 Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); 430 Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false);
433 Settings::values.disable_cpu_opt = 431 Settings::values.disable_cpu_opt =
434 sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false); 432 sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false);
433 Settings::values.disable_macro_jit =
434 sdl2_config->GetBoolean("Debugging", "disable_macro_jit", false);
435 435
436 const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); 436 const auto title_list = sdl2_config->Get("AddOns", "title_ids", "");
437 std::stringstream ss(title_list); 437 std::stringstream ss(title_list);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index abc6e6e65..45c07ed5d 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -117,11 +117,6 @@ use_hw_renderer =
117# 0: Interpreter (slow), 1 (default): JIT (fast) 117# 0: Interpreter (slow), 1 (default): JIT (fast)
118use_shader_jit = 118use_shader_jit =
119 119
120# Resolution scale factor
121# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
122# factor for the Switch resolution
123resolution_factor =
124
125# Aspect ratio 120# Aspect ratio
126# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 121# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
127aspect_ratio = 122aspect_ratio =
@@ -291,6 +286,8 @@ quest_flag =
291# Determines whether or not JIT CPU optimizations are enabled 286# Determines whether or not JIT CPU optimizations are enabled
292# false: Optimizations Enabled, true: Optimizations Disabled 287# false: Optimizations Enabled, true: Optimizations Disabled
293disable_cpu_opt = 288disable_cpu_opt =
289# Enables/Disables the macro JIT compiler
290disable_macro_jit=false
294 291
295[WebService] 292[WebService]
296# Whether or not to enable telemetry 293# Whether or not to enable telemetry
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); 100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
101 if (Settings::values.renderer_debug) {
102 SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
103 }
101 SDL_GL_SetSwapInterval(0); 104 SDL_GL_SetSwapInterval(0);
102 105
103 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, 106 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,
diff --git a/src/yuzu_cmd/yuzu.rc b/src/yuzu_cmd/yuzu.rc
index 7de8ef3d9..0cde75e2f 100644
--- a/src/yuzu_cmd/yuzu.rc
+++ b/src/yuzu_cmd/yuzu.rc
@@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico"
14// RT_MANIFEST 14// RT_MANIFEST
15// 15//
16 16
171 RT_MANIFEST "../../dist/yuzu.manifest" 170 RT_MANIFEST "../../dist/yuzu.manifest"
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index 3be58b15d..1566c2e3f 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -116,8 +116,6 @@ void Config::ReadValues() {
116 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 116 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
117 117
118 // Renderer 118 // Renderer
119 Settings::values.resolution_factor =
120 static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0));
121 Settings::values.aspect_ratio = 119 Settings::values.aspect_ratio =
122 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); 120 static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0));
123 Settings::values.max_anisotropy = 121 Settings::values.max_anisotropy =
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index ca203b64d..41bbbbf60 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -21,11 +21,6 @@ use_hw_renderer =
21# 0: Interpreter (slow), 1 (default): JIT (fast) 21# 0: Interpreter (slow), 1 (default): JIT (fast)
22use_shader_jit = 22use_shader_jit =
23 23
24# Resolution scale factor
25# 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale
26# factor for the Switch resolution
27resolution_factor =
28
29# Aspect ratio 24# Aspect ratio
30# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window 25# 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window
31aspect_ratio = 26aspect_ratio =
diff --git a/src/yuzu_tester/service/yuzutest.cpp b/src/yuzu_tester/service/yuzutest.cpp
index 85d3f436b..2d3f6e3a7 100644
--- a/src/yuzu_tester/service/yuzutest.cpp
+++ b/src/yuzu_tester/service/yuzutest.cpp
@@ -53,7 +53,7 @@ private:
53 53
54 IPC::ResponseBuilder rb{ctx, 3}; 54 IPC::ResponseBuilder rb{ctx, 3};
55 rb.Push(RESULT_SUCCESS); 55 rb.Push(RESULT_SUCCESS);
56 rb.Push<u32>(write_size); 56 rb.Push<u32>(static_cast<u32>(write_size));
57 } 57 }
58 58
59 void StartIndividual(Kernel::HLERequestContext& ctx) { 59 void StartIndividual(Kernel::HLERequestContext& ctx) {
diff --git a/src/yuzu_tester/yuzu.rc b/src/yuzu_tester/yuzu.rc
index 7de8ef3d9..0cde75e2f 100644
--- a/src/yuzu_tester/yuzu.rc
+++ b/src/yuzu_tester/yuzu.rc
@@ -14,4 +14,4 @@ YUZU_ICON ICON "../../dist/yuzu.ico"
14// RT_MANIFEST 14// RT_MANIFEST
15// 15//
16 16
171 RT_MANIFEST "../../dist/yuzu.manifest" 170 RT_MANIFEST "../../dist/yuzu.manifest"