summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/scripts/common/post-upload.sh15
-rw-r--r--.ci/scripts/common/pre-upload.sh6
-rw-r--r--.ci/scripts/format/docker.sh6
-rw-r--r--.ci/scripts/format/exec.sh4
-rw-r--r--.ci/scripts/format/script.sh37
-rw-r--r--.ci/scripts/linux/docker.sh14
-rw-r--r--.ci/scripts/linux/exec.sh5
-rw-r--r--.ci/scripts/linux/upload.sh14
-rw-r--r--.ci/scripts/merge/apply-patches-by-label.py28
-rw-r--r--.ci/scripts/merge/check-label-presence.py18
-rw-r--r--.ci/scripts/merge/yuzubot-git-config.sh2
-rw-r--r--.ci/scripts/windows/docker.sh50
-rw-r--r--.ci/scripts/windows/exec.sh5
-rw-r--r--.ci/scripts/windows/scan_dll.py106
-rw-r--r--.ci/scripts/windows/upload.sh13
-rw-r--r--.ci/templates/build-single.yml23
-rw-r--r--.ci/templates/build-standard.yml23
-rw-r--r--.ci/templates/build-testing.yml33
-rw-r--r--.ci/templates/format-check.yml14
-rw-r--r--.ci/templates/merge.yml46
-rw-r--r--.ci/templates/mergebot.yml15
-rw-r--r--.ci/templates/retrieve-artifact-source.yml16
-rw-r--r--.ci/templates/retrieve-master-source.yml11
-rw-r--r--.ci/templates/sync-source.yml7
-rw-r--r--.ci/yuzu-mainline.yml25
-rw-r--r--.ci/yuzu-patreon.yml (renamed from azure-pipelines.yml)0
-rw-r--r--.ci/yuzu-repo-sync.yml19
-rw-r--r--.ci/yuzu-verify.yml20
-rw-r--r--CMakeModules/GenerateSCMRev.cmake3
-rw-r--r--README.md1
-rw-r--r--src/audio_core/audio_renderer.cpp6
-rw-r--r--src/audio_core/audio_renderer.h3
-rw-r--r--src/common/CMakeLists.txt3
-rw-r--r--src/common/alignment.h60
-rw-r--r--src/core/CMakeLists.txt3
-rw-r--r--src/core/arm/arm_interface.h7
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp9
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h3
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp18
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h3
-rw-r--r--src/core/core.cpp12
-rw-r--r--src/core/core.h14
-rw-r--r--src/core/core_cpu.cpp19
-rw-r--r--src/core/file_sys/program_metadata.cpp4
-rw-r--r--src/core/file_sys/program_metadata.h4
-rw-r--r--src/core/hardware_interrupt_manager.cpp30
-rw-r--r--src/core/hardware_interrupt_manager.h31
-rw-r--r--src/core/hle/kernel/code_set.h3
-rw-r--r--src/core/hle/kernel/physical_memory.h19
-rw-r--r--src/core/hle/kernel/process.cpp49
-rw-r--r--src/core/hle/kernel/process.h45
-rw-r--r--src/core/hle/kernel/shared_memory.cpp6
-rw-r--r--src/core/hle/kernel/shared_memory.h13
-rw-r--r--src/core/hle/kernel/svc.cpp138
-rw-r--r--src/core/hle/kernel/svc_wrap.h5
-rw-r--r--src/core/hle/kernel/transfer_memory.cpp2
-rw-r--r--src/core/hle/kernel/transfer_memory.h3
-rw-r--r--src/core/hle/kernel/vm_manager.cpp319
-rw-r--r--src/core/hle/kernel/vm_manager.h57
-rw-r--r--src/core/hle/service/am/am.cpp32
-rw-r--r--src/core/hle/service/am/am.h3
-rw-r--r--src/core/hle/service/audio/audio.cpp6
-rw-r--r--src/core/hle/service/audio/audio.h6
-rw-r--r--src/core/hle/service/audio/audout_u.cpp36
-rw-r--r--src/core/hle/service/audio/audout_u.h12
-rw-r--r--src/core/hle/service/audio/audren_u.cpp205
-rw-r--r--src/core/hle/service/audio/audren_u.h24
-rw-r--r--src/core/hle/service/friend/friend.cpp35
-rw-r--r--src/core/hle/service/ldr/ldr.cpp32
-rw-r--r--src/core/hle/service/mii/mii.cpp16
-rw-r--r--src/core/hle/service/ns/pl_u.cpp12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h13
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp152
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp44
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h41
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp5
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h5
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp48
-rw-r--r--src/core/hle/service/nvdrv/interface.h4
-rw-r--r--src/core/hle/service/nvdrv/nvdata.h48
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp59
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h88
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp23
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h11
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp23
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h4
-rw-r--r--src/core/hle/service/pm/pm.cpp124
-rw-r--r--src/core/hle/service/pm/pm.h6
-rw-r--r--src/core/hle/service/service.cpp6
-rw-r--r--src/core/hle/service/vi/vi.cpp48
-rw-r--r--src/core/loader/elf.cpp2
-rw-r--r--src/core/loader/kip.cpp2
-rw-r--r--src/core/loader/nro.cpp2
-rw-r--r--src/core/loader/nso.cpp2
-rw-r--r--src/core/settings.cpp1
-rw-r--r--src/core/settings.h1
-rw-r--r--src/core/telemetry_session.cpp1
-rw-r--r--src/video_core/CMakeLists.txt8
-rw-r--r--src/video_core/buffer_cache/buffer_block.h76
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h447
-rw-r--r--src/video_core/buffer_cache/map_interval.h89
-rw-r--r--src/video_core/dma_pusher.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.h3
-rw-r--r--src/video_core/engines/kepler_compute.cpp9
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp332
-rw-r--r--src/video_core/engines/maxwell_3d.h137
-rw-r--r--src/video_core/engines/maxwell_dma.cpp52
-rw-r--r--src/video_core/engines/maxwell_dma.h9
-rw-r--r--src/video_core/engines/shader_bytecode.h81
-rw-r--r--src/video_core/gpu.cpp98
-rw-r--r--src/video_core/gpu.h64
-rw-r--r--src/video_core/gpu_asynch.cpp14
-rw-r--r--src/video_core/gpu_asynch.h8
-rw-r--r--src/video_core/gpu_synch.cpp7
-rw-r--r--src/video_core/gpu_synch.h9
-rw-r--r--src/video_core/gpu_thread.cpp35
-rw-r--r--src/video_core/gpu_thread.h35
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/memory_manager.cpp24
-rw-r--r--src/video_core/memory_manager.h8
-rw-r--r--src/video_core/morton.cpp116
-rw-r--r--src/video_core/morton.h3
-rw-r--r--src/video_core/rasterizer_interface.h9
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp124
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h77
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_device.h15
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp432
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h51
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp191
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp273
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h5
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp45
-rw-r--r--src/video_core/renderer_opengl/gl_state.h33
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp100
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_opengl/utils.cpp48
-rw-r--r--src/video_core/renderer_opengl/utils.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h78
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp83
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h2
-rw-r--r--src/video_core/shader/control_flow.cpp481
-rw-r--r--src/video_core/shader/control_flow.h79
-rw-r--r--src/video_core/shader/decode.cpp178
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp13
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp44
-rw-r--r--src/video_core/shader/decode/ffma.cpp10
-rw-r--r--src/video_core/shader/decode/float_set.cpp1
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp10
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp71
-rw-r--r--src/video_core/shader/decode/hfma2.cpp4
-rw-r--r--src/video_core/shader/decode/image.cpp6
-rw-r--r--src/video_core/shader/decode/integer_set.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp37
-rw-r--r--src/video_core/shader/decode/other.cpp71
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp1
-rw-r--r--src/video_core/shader/decode/texture.cpp42
-rw-r--r--src/video_core/shader/decode/warp.cpp55
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h44
-rw-r--r--src/video_core/shader/node_helper.cpp2
-rw-r--r--src/video_core/shader/shader_ir.cpp141
-rw-r--r--src/video_core/shader/shader_ir.h59
-rw-r--r--src/video_core/shader/track.cpp37
-rw-r--r--src/video_core/surface.cpp5
-rw-r--r--src/video_core/texture_cache/surface_base.cpp14
-rw-r--r--src/video_core/texture_cache/surface_base.h10
-rw-r--r--src/video_core/texture_cache/surface_params.cpp13
-rw-r--r--src/video_core/texture_cache/surface_params.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h48
-rw-r--r--src/video_core/textures/decoders.cpp14
-rw-r--r--src/video_core/textures/decoders.h3
-rw-r--r--src/video_core/textures/texture.h2
-rw-r--r--src/yuzu/CMakeLists.txt55
-rw-r--r--src/yuzu/configuration/config.cpp6
-rw-r--r--src/yuzu/configuration/configure_debug.cpp4
-rw-r--r--src/yuzu/configuration/configure_gamelist.cpp2
-rw-r--r--src/yuzu/configuration/configure_general.cpp2
-rw-r--r--src/yuzu/configuration/configure_input.cpp12
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp10
-rw-r--r--src/yuzu/configuration/configure_input_simple.cpp4
-rw-r--r--src/yuzu/configuration/configure_mouse_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_per_general.cpp2
-rw-r--r--src/yuzu/configuration/configure_profile_manager.cpp8
-rw-r--r--src/yuzu/configuration/configure_touchscreen_advanced.cpp2
-rw-r--r--src/yuzu/configuration/configure_web.cpp2
-rw-r--r--src/yuzu/debugger/console.cpp2
-rw-r--r--src/yuzu/discord_impl.cpp2
-rw-r--r--src/yuzu/game_list.cpp2
-rw-r--r--src/yuzu/game_list_p.h2
-rw-r--r--src/yuzu/game_list_worker.cpp2
-rw-r--r--src/yuzu/hotkeys.cpp2
-rw-r--r--src/yuzu/main.cpp36
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/uisettings.cpp (renamed from src/yuzu/ui_settings.cpp)2
-rw-r--r--src/yuzu/uisettings.h (renamed from src/yuzu/ui_settings.h)0
-rw-r--r--src/yuzu_cmd/config.cpp1
-rw-r--r--src/yuzu_cmd/default_ini.h4
-rw-r--r--src/yuzu_tester/config.cpp1
-rw-r--r--src/yuzu_tester/default_ini.h4
-rw-r--r--src/yuzu_tester/yuzu.cpp3
236 files changed, 6092 insertions, 2036 deletions
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh
new file mode 100644
index 000000000..bb4e9d328
--- /dev/null
+++ b/.ci/scripts/common/post-upload.sh
@@ -0,0 +1,15 @@
1#!/bin/bash -ex
2
3# Copy documentation
4cp license.txt "$REV_NAME"
5cp README.md "$REV_NAME"
6
7tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME"
8
9mv "$REV_NAME" $RELEASE_NAME
10
117z a "$REV_NAME.7z" $RELEASE_NAME
12
13# move the compiled archive into the artifacts directory to be uploaded by travis releases
14mv "$ARCHIVE_NAME" artifacts/
15mv "$REV_NAME.7z" artifacts/
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh
new file mode 100644
index 000000000..3c2fc79a2
--- /dev/null
+++ b/.ci/scripts/common/pre-upload.sh
@@ -0,0 +1,6 @@
1#!/bin/bash -ex
2
3GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`"
4GITREV="`git show -s --format='%h'`"
5
6mkdir -p artifacts
diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh
new file mode 100644
index 000000000..778411e4a
--- /dev/null
+++ b/.ci/scripts/format/docker.sh
@@ -0,0 +1,6 @@
1#!/bin/bash -ex
2
3# Run clang-format
4cd /yuzu
5chmod a+x ./.ci/scripts/format/script.sh
6./.ci/scripts/format/script.sh
diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh
new file mode 100644
index 000000000..5d6393b38
--- /dev/null
+++ b/.ci/scripts/format/exec.sh
@@ -0,0 +1,4 @@
1#!/bin/bash -ex
2
3chmod a+x ./.ci/scripts/format/docker.sh
4docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh
diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh
new file mode 100644
index 000000000..5ab828d5e
--- /dev/null
+++ b/.ci/scripts/format/script.sh
@@ -0,0 +1,37 @@
1#!/bin/bash -ex
2
3if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \
4 dist/*.svg dist/*.xml; then
5 echo Trailing whitespace found, aborting
6 exit 1
7fi
8
9# Default clang-format points to default 3.5 version one
10CLANG_FORMAT=clang-format-6.0
11$CLANG_FORMAT --version
12
13if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then
14 # Get list of every file modified in this pull request
15 files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)"
16else
17 # Check everything for branch pushes
18 files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')"
19fi
20
21# Turn off tracing for this because it's too verbose
22set +x
23
24for f in $files_to_lint; do
25 d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true)
26 if ! [ -z "$d" ]; then
27 echo "!!! $f not compliant to coding style, here is the fix:"
28 echo "$d"
29 fail=1
30 fi
31done
32
33set -x
34
35if [ "$fail" = 1 ]; then
36 exit 1
37fi
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh
new file mode 100644
index 000000000..f538a4081
--- /dev/null
+++ b/.ci/scripts/linux/docker.sh
@@ -0,0 +1,14 @@
1#!/bin/bash -ex
2
3cd /yuzu
4
5ccache -s
6
7mkdir build || true && cd build
8cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
9
10ninja
11
12ccache -s
13
14ctest -VV -C Release
diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh
new file mode 100644
index 000000000..a5a6c34b9
--- /dev/null
+++ b/.ci/scripts/linux/exec.sh
@@ -0,0 +1,5 @@
1#!/bin/bash -ex
2
3mkdir -p "ccache" || true
4chmod a+x ./.ci/scripts/linux/docker.sh
5docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh
new file mode 100644
index 000000000..0d131d1dd
--- /dev/null
+++ b/.ci/scripts/linux/upload.sh
@@ -0,0 +1,14 @@
1#!/bin/bash -ex
2
3. .ci/scripts/common/pre-upload.sh
4
5REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.xz"
7COMPRESSION_FLAGS="-cJvf"
8
9mkdir "$REV_NAME"
10
11cp build/bin/yuzu-cmd "$REV_NAME"
12cp build/bin/yuzu "$REV_NAME"
13
14. .ci/scripts/common/post-upload.sh
diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py
new file mode 100644
index 000000000..b346001a5
--- /dev/null
+++ b/.ci/scripts/merge/apply-patches-by-label.py
@@ -0,0 +1,28 @@
1# Download all pull requests as patches that match a specific label
2# Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to>
3
4import requests, sys, json, urllib3.request, shutil, subprocess
5
6http = urllib3.PoolManager()
7dl_list = {}
8
9def check_individual(labels):
10 for label in labels:
11 if (label["name"] == sys.argv[1]):
12 return True
13 return False
14
15try:
16 url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls'
17 response = requests.get(url)
18 if (response.ok):
19 j = json.loads(response.content)
20 for pr in j:
21 if (check_individual(pr["labels"])):
22 pn = pr["number"]
23 print("Matched PR# %s" % pn)
24 print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"]))
25 print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn]))
26 print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn]))
27except:
28 sys.exit(-1)
diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py
new file mode 100644
index 000000000..048466d7e
--- /dev/null
+++ b/.ci/scripts/merge/check-label-presence.py
@@ -0,0 +1,18 @@
1# Checks to see if the specified pull request # has the specified tag
2# Usage: python check-label-presence.py <Pull Request ID> <Name of Label>
3
4import requests, json, sys
5
6try:
7 url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1]
8 response = requests.get(url)
9 if (response.ok):
10 j = json.loads(response.content)
11 for label in j["labels"]:
12 if label["name"] == sys.argv[2]:
13 print('##vso[task.setvariable variable=enabletesting;]true')
14 sys.exit()
15except:
16 sys.exit(-1)
17
18print('##vso[task.setvariable variable=enabletesting;]false')
diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh
new file mode 100644
index 000000000..d9d595bbc
--- /dev/null
+++ b/.ci/scripts/merge/yuzubot-git-config.sh
@@ -0,0 +1,2 @@
1git config --global user.email "yuzu@yuzu-emu.org"
2git config --global user.name "yuzubot" \ No newline at end of file
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh
new file mode 100644
index 000000000..f7093363b
--- /dev/null
+++ b/.ci/scripts/windows/docker.sh
@@ -0,0 +1,50 @@
1#!/bin/bash -ex
2
3cd /yuzu
4
5ccache -s
6
7# Dirty hack to trick unicorn makefile into believing we are in a MINGW system
8mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname
9chmod +x /bin/uname
10
11# Dirty hack to trick unicorn makefile into believing we have cmd
12echo '' >> /bin/cmd
13chmod +x /bin/cmd
14
15mkdir build || true && cd build
16cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
17ninja
18
19# Clean up the dirty hacks
20rm /bin/uname && mv /bin/uname1 /bin/uname
21rm /bin/cmd
22
23ccache -s
24
25echo "Tests skipped"
26#ctest -VV -C Release
27
28echo 'Prepare binaries...'
29cd ..
30mkdir package
31
32QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
33find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
34
35# copy Qt plugins
36mkdir package/platforms
37cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/
38cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/
39cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/
40rm -f package/mediaservice/*d.dll
41
42for i in package/*.exe; do
43 # we need to process pdb here, however, cv2pdb
44 # does not work here, so we just simply strip all the debug symbols
45 x86_64-w64-mingw32-strip "${i}"
46done
47
48pip3 install pefile
49python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/"
50python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/"
diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh
new file mode 100644
index 000000000..d6a994856
--- /dev/null
+++ b/.ci/scripts/windows/exec.sh
@@ -0,0 +1,5 @@
1#!/bin/bash -ex
2
3mkdir -p "ccache" || true
4chmod a+x ./.ci/scripts/windows/docker.sh
5docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh
diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py
new file mode 100644
index 000000000..163183f2e
--- /dev/null
+++ b/.ci/scripts/windows/scan_dll.py
@@ -0,0 +1,106 @@
1import pefile
2import sys
3import re
4import os
5import queue
6import shutil
7
8# constant definitions
9KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL',
10 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL']
11# below is for Ubuntu 18.04 with specified PPA enabled, if you are using
12# other distro or different repositories, change the following accordingly
13DLL_PATH = [
14 '/usr/x86_64-w64-mingw32/bin/',
15 '/usr/x86_64-w64-mingw32/lib/',
16 '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/'
17]
18
19missing = []
20
21
22def parse_imports(file_name):
23 results = []
24 pe = pefile.PE(file_name, fast_load=True)
25 pe.parse_data_directories()
26
27 for entry in pe.DIRECTORY_ENTRY_IMPORT:
28 current = entry.dll.decode()
29 current_u = current.upper() # b/c Windows is often case insensitive
30 # here we filter out system dlls
31 # dll w/ names like *32.dll are likely to be system dlls
32 if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'):
33 results.append(current)
34
35 return results
36
37
38def parse_imports_recursive(file_name, path_list=[]):
39 q = queue.Queue() # create a FIFO queue
40 # file_name can be a string or a list for the convience
41 if isinstance(file_name, str):
42 q.put(file_name)
43 elif isinstance(file_name, list):
44 for i in file_name:
45 q.put(i)
46 full_list = []
47 while q.qsize():
48 current = q.get_nowait()
49 print('> %s' % current)
50 deps = parse_imports(current)
51 # if this dll does not have any import, ignore it
52 if not deps:
53 continue
54 for dep in deps:
55 # the dependency already included in the list, skip
56 if dep in full_list:
57 continue
58 # find the requested dll in the provided paths
59 full_path = find_dll(dep)
60 if not full_path:
61 missing.append(dep)
62 continue
63 full_list.append(dep)
64 q.put(full_path)
65 path_list.append(full_path)
66 return full_list
67
68
69def find_dll(name):
70 for path in DLL_PATH:
71 for root, _, files in os.walk(path):
72 for f in files:
73 if name.lower() == f.lower():
74 return os.path.join(root, f)
75
76
77def deploy(name, dst, dry_run=False):
78 dlls_path = []
79 parse_imports_recursive(name, dlls_path)
80 for dll_entry in dlls_path:
81 if not dry_run:
82 shutil.copy(dll_entry, dst)
83 else:
84 print('[Dry-Run] Copy %s to %s' % (dll_entry, dst))
85 print('Deploy completed.')
86 return dlls_path
87
88
89def main():
90 if len(sys.argv) < 3:
91 print('Usage: %s [files to examine ...] [target deploy directory]')
92 return 1
93 to_deploy = sys.argv[1:-1]
94 tgt_dir = sys.argv[-1]
95 if not os.path.isdir(tgt_dir):
96 print('%s is not a directory.' % tgt_dir)
97 return 1
98 print('Scanning dependencies...')
99 deploy(to_deploy, tgt_dir)
100 if missing:
101 print('Following DLLs are not found: %s' % ('\n'.join(missing)))
102 return 0
103
104
105if __name__ == '__main__':
106 main()
diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh
new file mode 100644
index 000000000..de73d3541
--- /dev/null
+++ b/.ci/scripts/windows/upload.sh
@@ -0,0 +1,13 @@
1#!/bin/bash -ex
2
3. .ci/scripts/common/pre-upload.sh
4
5REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.gz"
7COMPRESSION_FLAGS="-czvf"
8
9mkdir "$REV_NAME"
10# get around the permission issues
11cp -r package/* "$REV_NAME"
12
13. .ci/scripts/common/post-upload.sh
diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml
new file mode 100644
index 000000000..357731eb9
--- /dev/null
+++ b/.ci/templates/build-single.yml
@@ -0,0 +1,23 @@
1parameters:
2 artifactSource: 'true'
3 cache: 'false'
4
5steps:
6- task: DockerInstaller@0
7 displayName: 'Prepare Environment'
8 inputs:
9 dockerVersion: '17.09.0-ce'
10- ${{ if eq(parameters.cache, 'true') }}:
11 - task: CacheBeta@0
12 displayName: 'Cache Build System'
13 inputs:
14 key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix)
15 path: $(System.DefaultWorkingDirectory)/ccache
16 cacheHitVar: CACHE_RESTORED
17- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh
18 displayName: 'Build'
19- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && RELEASE_NAME=$(BuildName) ./.ci/scripts/$(ScriptFolder)/upload.sh
20 displayName: 'Package Artifacts'
21- publish: artifacts
22 artifact: 'yuzu-$(BuildName)-$(BuildSuffix)'
23 displayName: 'Upload Artifacts'
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
new file mode 100644
index 000000000..aa180894e
--- /dev/null
+++ b/.ci/templates/build-standard.yml
@@ -0,0 +1,23 @@
1jobs:
2- job: build
3 displayName: 'standard'
4 pool:
5 vmImage: ubuntu-latest
6 strategy:
7 maxParallel: 10
8 matrix:
9 windows:
10 BuildSuffix: 'windows-mingw'
11 ScriptFolder: 'windows'
12 linux:
13 BuildSuffix: 'linux'
14 ScriptFolder: 'linux'
15 steps:
16 - template: ./sync-source.yml
17 parameters:
18 artifactSource: $(parameters.artifactSource)
19 needSubmodules: 'true'
20 - template: ./build-single.yml
21 parameters:
22 artifactSource: 'false'
23 cache: $(parameters.cache) \ No newline at end of file
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml
new file mode 100644
index 000000000..a307addfd
--- /dev/null
+++ b/.ci/templates/build-testing.yml
@@ -0,0 +1,33 @@
1jobs:
2- job: build_test
3 displayName: 'testing'
4 pool:
5 vmImage: ubuntu-latest
6 strategy:
7 maxParallel: 5
8 matrix:
9 windows:
10 BuildSuffix: 'windows-testing'
11 ScriptFolder: 'windows'
12 steps:
13 - script: sudo apt upgrade python3-pip && pip install requests urllib3
14 displayName: 'Prepare Environment'
15 - task: PythonScript@0
16 condition: eq(variables['Build.Reason'], 'PullRequest')
17 displayName: 'Determine Testing Status'
18 inputs:
19 scriptSource: 'filePath'
20 scriptPath: '.ci/scripts/merge/check-label-presence.py'
21 arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build'
22 - ${{ if eq(variables.enabletesting, 'true') }}:
23 - template: ./sync-source.yml
24 parameters:
25 artifactSource: $(parameters.artifactSource)
26 needSubmodules: 'true'
27 - template: ./mergebot.yml
28 parameters:
29 matchLabel: 'testing-merge'
30 - template: ./build-single.yml
31 parameters:
32 artifactSource: 'false'
33 cache: 'false'
diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml
new file mode 100644
index 000000000..5061f1cb8
--- /dev/null
+++ b/.ci/templates/format-check.yml
@@ -0,0 +1,14 @@
1parameters:
2 artifactSource: 'true'
3
4steps:
5- template: ./sync-source.yml
6 parameters:
7 artifactSource: $(parameters.artifactSource)
8 needSubmodules: 'false'
9- task: DockerInstaller@0
10 displayName: 'Prepare Environment'
11 inputs:
12 dockerVersion: '17.09.0-ce'
13- script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh
14 displayName: 'Verify Formatting'
diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml
new file mode 100644
index 000000000..efc82778a
--- /dev/null
+++ b/.ci/templates/merge.yml
@@ -0,0 +1,46 @@
1jobs:
2- job: merge
3 displayName: 'pull requests'
4 steps:
5 - checkout: self
6 submodules: recursive
7 - template: ./mergebot.yml
8 parameters:
9 matchLabel: '$(BuildName)-merge'
10 - task: ArchiveFiles@2
11 displayName: 'Package Source'
12 inputs:
13 rootFolderOrFile: '$(System.DefaultWorkingDirectory)'
14 includeRootFolder: false
15 archiveType: '7z'
16 archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
17 - task: PublishPipelineArtifact@1
18 displayName: 'Upload Artifacts'
19 inputs:
20 targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
21 artifact: 'yuzu-$(BuildName)-source'
22 replaceExistingArchive: true
23- job: upload_source
24 displayName: 'upload'
25 dependsOn: merge
26 steps:
27 - template: ./sync-source.yml
28 parameters:
29 artifactSource: 'true'
30 needSubmodules: 'true'
31 - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
32 displayName: 'Apply Git Configuration'
33 - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)"
34 displayName: 'Tag Source'
35 - script: git remote add other $(GitRepoPushChangesURL)
36 displayName: 'Register Repository'
37 - script: git push --follow-tags --force other HEAD:$(GitPushBranch)
38 displayName: 'Update Code'
39 - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha
40 displayName: 'Calculate Release Point'
41 - task: PublishPipelineArtifact@1
42 displayName: 'Upload Release Point'
43 inputs:
44 targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha'
45 artifact: 'yuzu-$(BuildName)-release-point'
46 replaceExistingArchive: true \ No newline at end of file
diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml
new file mode 100644
index 000000000..5211efcc6
--- /dev/null
+++ b/.ci/templates/mergebot.yml
@@ -0,0 +1,15 @@
1parameters:
2 matchLabel: 'dummy-merge'
3
4steps:
5 - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3
6 displayName: 'Prepare Environment'
7 - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
8 displayName: 'Apply Git Configuration'
9 - task: PythonScript@0
10 displayName: 'Discover, Download, and Apply Patches'
11 inputs:
12 scriptSource: 'filePath'
13 scriptPath: '.ci/scripts/merge/apply-patches-by-label.py'
14 arguments: '${{ parameters.matchLabel }} patches'
15 workingDirectory: '$(System.DefaultWorkingDirectory)'
diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml
new file mode 100644
index 000000000..47d217e7b
--- /dev/null
+++ b/.ci/templates/retrieve-artifact-source.yml
@@ -0,0 +1,16 @@
1steps:
2- checkout: none
3- task: DownloadPipelineArtifact@2
4 displayName: 'Download Source'
5 inputs:
6 artifactName: 'yuzu-$(BuildName)-source'
7 buildType: 'current'
8 targetPath: '$(Build.ArtifactStagingDirectory)'
9- script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory)
10 displayName: 'Clean Working Directory'
11- task: ExtractFiles@1
12 displayName: 'Prepare Source'
13 inputs:
14 archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z'
15 destinationFolder: '$(System.DefaultWorkingDirectory)'
16 cleanDestinationFolder: false \ No newline at end of file
diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml
new file mode 100644
index 000000000..a08a3f926
--- /dev/null
+++ b/.ci/templates/retrieve-master-source.yml
@@ -0,0 +1,11 @@
1parameters:
2 needSubmodules: 'true'
3
4steps:
5- checkout: self
6 displayName: 'Checkout Recursive'
7 submodules: recursive
8# condition: eq(parameters.needSubmodules, 'true')
9#- checkout: self
10# displayName: 'Checkout Fast'
11# condition: ne(parameters.needSubmodules, 'true')
diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml
new file mode 100644
index 000000000..409e1cd83
--- /dev/null
+++ b/.ci/templates/sync-source.yml
@@ -0,0 +1,7 @@
1steps:
2- ${{ if eq(parameters.artifactSource, 'true') }}:
3 - template: ./retrieve-artifact-source.yml
4- ${{ if ne(parameters.artifactSource, 'true') }}:
5 - template: ./retrieve-master-source.yml
6 parameters:
7 needSubmodules: $(parameters.needSubmodules) \ No newline at end of file
diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml
new file mode 100644
index 000000000..2930a8564
--- /dev/null
+++ b/.ci/yuzu-mainline.yml
@@ -0,0 +1,25 @@
1trigger:
2- master
3
4stages:
5- stage: merge
6 displayName: 'merge'
7 jobs:
8 - template: ./templates/merge.yml
9- stage: format
10 dependsOn: merge
11 displayName: 'format'
12 jobs:
13 - job: format
14 displayName: 'clang'
15 pool:
16 vmImage: ubuntu-latest
17 steps:
18 - template: ./templates/format-check.yml
19- stage: build
20 displayName: 'build'
21 dependsOn: format
22 jobs:
23 - template: ./templates/build-standard.yml
24 parameters:
25 cache: 'true'
diff --git a/azure-pipelines.yml b/.ci/yuzu-patreon.yml
index aa912913d..aa912913d 100644
--- a/azure-pipelines.yml
+++ b/.ci/yuzu-patreon.yml
diff --git a/.ci/yuzu-repo-sync.yml b/.ci/yuzu-repo-sync.yml
new file mode 100644
index 000000000..602e298a6
--- /dev/null
+++ b/.ci/yuzu-repo-sync.yml
@@ -0,0 +1,19 @@
1trigger:
2- master
3
4jobs:
5- job: copy
6 displayName: 'Sync Repository'
7 pool:
8 vmImage: 'ubuntu-latest'
9 steps:
10 - script: echo 'https://$(GitUsername):$(GitAccessToken)@dev.azure.com' > $HOME/.git-credentials
11 displayName: 'Load Credentials'
12 - script: git config --global credential.helper store
13 displayName: 'Register Credential Helper'
14 - script: git remote add other $(GitRepoPushChangesURL)
15 displayName: 'Register Repository'
16 - script: git push --force other HEAD:$(GitPushBranch)
17 displayName: 'Update Code'
18 - script: rm -rf $HOME/.git-credentials
19 displayName: 'Clear Cached Credentials'
diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml
new file mode 100644
index 000000000..5492e696a
--- /dev/null
+++ b/.ci/yuzu-verify.yml
@@ -0,0 +1,20 @@
1stages:
2- stage: format
3 displayName: 'format'
4 jobs:
5 - job: format
6 displayName: 'clang'
7 pool:
8 vmImage: ubuntu-latest
9 steps:
10 - template: ./templates/format-check.yml
11 parameters:
12 artifactSource: 'false'
13- stage: build
14 displayName: 'build'
15 dependsOn: format
16 jobs:
17 - template: ./templates/build-standard.yml
18 parameters:
19 cache: 'false'
20 - template: ./templates/build-testing.yml
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index dd65cfe42..a1ace89cb 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -81,7 +81,10 @@ set(HASH_FILES
81 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" 81 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
82 "${VIDEO_CORE}/shader/decode/shift.cpp" 82 "${VIDEO_CORE}/shader/decode/shift.cpp"
83 "${VIDEO_CORE}/shader/decode/video.cpp" 83 "${VIDEO_CORE}/shader/decode/video.cpp"
84 "${VIDEO_CORE}/shader/decode/warp.cpp"
84 "${VIDEO_CORE}/shader/decode/xmad.cpp" 85 "${VIDEO_CORE}/shader/decode/xmad.cpp"
86 "${VIDEO_CORE}/shader/control_flow.cpp"
87 "${VIDEO_CORE}/shader/control_flow.h"
85 "${VIDEO_CORE}/shader/decode.cpp" 88 "${VIDEO_CORE}/shader/decode.cpp"
86 "${VIDEO_CORE}/shader/node.h" 89 "${VIDEO_CORE}/shader/node.h"
87 "${VIDEO_CORE}/shader/node_helper.cpp" 90 "${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/README.md b/README.md
index 4b1ea7d7c..430c6dd65 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@ yuzu emulator
2============= 2=============
3[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu) 3[![Travis CI Build Status](https://travis-ci.org/yuzu-emu/yuzu.svg?branch=master)](https://travis-ci.org/yuzu-emu/yuzu)
4[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu) 4[![AppVeyor CI Build Status](https://ci.appveyor.com/api/projects/status/77k97svb2usreu68?svg=true)](https://ci.appveyor.com/project/bunnei/yuzu)
5[![Azure Mainline CI Build Status](https://dev.azure.com/yuzu-emu/yuzu/_apis/build/status/yuzu%20mainline?branchName=master)](https://dev.azure.com/yuzu-emu/yuzu/)
5 6
6yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/). 7yuzu is an experimental open-source emulator for the Nintendo Switch from the creators of [Citra](https://citra-emu.org/).
7 8
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 4882a6cd8..da50a0bbc 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -73,13 +73,15 @@ private:
73 EffectInStatus info{}; 73 EffectInStatus info{};
74}; 74};
75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, 75AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event) 76 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
77 std::size_t instance_number)
77 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 78 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
78 effects(params.effect_count) { 79 effects(params.effect_count) {
79 80
80 audio_out = std::make_unique<AudioCore::AudioOut>(); 81 audio_out = std::make_unique<AudioCore::AudioOut>();
81 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, 82 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
82 "AudioRenderer", [=]() { buffer_event->Signal(); }); 83 fmt::format("AudioRenderer-Instance{}", instance_number),
84 [=]() { buffer_event->Signal(); });
83 audio_out->StartStream(stream); 85 audio_out->StartStream(stream);
84 86
85 QueueMixedBuffer(0); 87 QueueMixedBuffer(0);
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index b2e5d336c..45afbe759 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -215,7 +215,8 @@ static_assert(sizeof(UpdateDataHeader) == 0x40, "UpdateDataHeader has wrong size
215class AudioRenderer { 215class AudioRenderer {
216public: 216public:
217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params, 217 AudioRenderer(Core::Timing::CoreTiming& core_timing, AudioRendererParameter params,
218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event); 218 Kernel::SharedPtr<Kernel::WritableEvent> buffer_event,
219 std::size_t instance_number);
219 ~AudioRenderer(); 220 ~AudioRenderer();
220 221
221 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params); 222 std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 2554add28..01abdb3bb 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -55,7 +55,10 @@ add_custom_command(OUTPUT scm_rev.cpp
55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp" 55 "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
56 "${VIDEO_CORE}/shader/decode/shift.cpp" 56 "${VIDEO_CORE}/shader/decode/shift.cpp"
57 "${VIDEO_CORE}/shader/decode/video.cpp" 57 "${VIDEO_CORE}/shader/decode/video.cpp"
58 "${VIDEO_CORE}/shader/decode/warp.cpp"
58 "${VIDEO_CORE}/shader/decode/xmad.cpp" 59 "${VIDEO_CORE}/shader/decode/xmad.cpp"
60 "${VIDEO_CORE}/shader/control_flow.cpp"
61 "${VIDEO_CORE}/shader/control_flow.h"
59 "${VIDEO_CORE}/shader/decode.cpp" 62 "${VIDEO_CORE}/shader/decode.cpp"
60 "${VIDEO_CORE}/shader/node.h" 63 "${VIDEO_CORE}/shader/node.h"
61 "${VIDEO_CORE}/shader/node_helper.cpp" 64 "${VIDEO_CORE}/shader/node_helper.cpp"
diff --git a/src/common/alignment.h b/src/common/alignment.h
index 617b14d9b..88d5d3a65 100644
--- a/src/common/alignment.h
+++ b/src/common/alignment.h
@@ -3,6 +3,7 @@
3#pragma once 3#pragma once
4 4
5#include <cstddef> 5#include <cstddef>
6#include <memory>
6#include <type_traits> 7#include <type_traits>
7 8
8namespace Common { 9namespace Common {
@@ -37,4 +38,63 @@ constexpr bool IsWordAligned(T value) {
37 return (value & 0b11) == 0; 38 return (value & 0b11) == 0;
38} 39}
39 40
41template <typename T, std::size_t Align = 16>
42class AlignmentAllocator {
43public:
44 using value_type = T;
45 using size_type = std::size_t;
46 using difference_type = std::ptrdiff_t;
47
48 using pointer = T*;
49 using const_pointer = const T*;
50
51 using reference = T&;
52 using const_reference = const T&;
53
54public:
55 pointer address(reference r) noexcept {
56 return std::addressof(r);
57 }
58
59 const_pointer address(const_reference r) const noexcept {
60 return std::addressof(r);
61 }
62
63 pointer allocate(size_type n) {
64 return static_cast<pointer>(::operator new (n, std::align_val_t{Align}));
65 }
66
67 void deallocate(pointer p, size_type) {
68 ::operator delete (p, std::align_val_t{Align});
69 }
70
71 void construct(pointer p, const value_type& wert) {
72 new (p) value_type(wert);
73 }
74
75 void destroy(pointer p) {
76 p->~value_type();
77 }
78
79 size_type max_size() const noexcept {
80 return size_type(-1) / sizeof(value_type);
81 }
82
83 template <typename T2>
84 struct rebind {
85 using other = AlignmentAllocator<T2, Align>;
86 };
87
88 bool operator!=(const AlignmentAllocator<T, Align>& other) const noexcept {
89 return !(*this == other);
90 }
91
92 // Returns true if and only if storage allocated from *this
93 // can be deallocated from other, and vice versa.
94 // Always returns true for stateless allocators.
95 bool operator==(const AlignmentAllocator<T, Align>& other) const noexcept {
96 return true;
97 }
98};
99
40} // namespace Common 100} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index f4325f0f8..5462decee 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -111,6 +111,8 @@ add_library(core STATIC
111 frontend/scope_acquire_window_context.h 111 frontend/scope_acquire_window_context.h
112 gdbstub/gdbstub.cpp 112 gdbstub/gdbstub.cpp
113 gdbstub/gdbstub.h 113 gdbstub/gdbstub.h
114 hardware_interrupt_manager.cpp
115 hardware_interrupt_manager.h
114 hle/ipc.h 116 hle/ipc.h
115 hle/ipc_helpers.h 117 hle/ipc_helpers.h
116 hle/kernel/address_arbiter.cpp 118 hle/kernel/address_arbiter.cpp
@@ -372,6 +374,7 @@ add_library(core STATIC
372 hle/service/nvdrv/devices/nvmap.h 374 hle/service/nvdrv/devices/nvmap.h
373 hle/service/nvdrv/interface.cpp 375 hle/service/nvdrv/interface.cpp
374 hle/service/nvdrv/interface.h 376 hle/service/nvdrv/interface.h
377 hle/service/nvdrv/nvdata.h
375 hle/service/nvdrv/nvdrv.cpp 378 hle/service/nvdrv/nvdrv.cpp
376 hle/service/nvdrv/nvdrv.h 379 hle/service/nvdrv/nvdrv.h
377 hle/service/nvdrv/nvmemp.cpp 380 hle/service/nvdrv/nvmemp.cpp
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index c6691a8e1..45e94e625 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -44,13 +44,6 @@ public:
44 /// Step CPU by one instruction 44 /// Step CPU by one instruction
45 virtual void Step() = 0; 45 virtual void Step() = 0;
46 46
47 /// Maps a backing memory region for the CPU
48 virtual void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
49 Kernel::VMAPermission perms) = 0;
50
51 /// Unmaps a region of memory that was previously mapped using MapBackingMemory
52 virtual void UnmapMemory(VAddr address, std::size_t size) = 0;
53
54 /// Clear all instruction cache 47 /// Clear all instruction cache
55 virtual void ClearInstructionCache() = 0; 48 virtual void ClearInstructionCache() = 0;
56 49
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 44307fa19..f1506b372 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -177,15 +177,6 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
177 177
178ARM_Dynarmic::~ARM_Dynarmic() = default; 178ARM_Dynarmic::~ARM_Dynarmic() = default;
179 179
180void ARM_Dynarmic::MapBackingMemory(u64 address, std::size_t size, u8* memory,
181 Kernel::VMAPermission perms) {
182 inner_unicorn.MapBackingMemory(address, size, memory, perms);
183}
184
185void ARM_Dynarmic::UnmapMemory(u64 address, std::size_t size) {
186 inner_unicorn.UnmapMemory(address, size);
187}
188
189void ARM_Dynarmic::SetPC(u64 pc) { 180void ARM_Dynarmic::SetPC(u64 pc) {
190 jit->SetPC(pc); 181 jit->SetPC(pc);
191} 182}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index b701e97a3..504d46c68 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -23,9 +23,6 @@ public:
23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index); 23 ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor, std::size_t core_index);
24 ~ARM_Dynarmic() override; 24 ~ARM_Dynarmic() override;
25 25
26 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
27 Kernel::VMAPermission perms) override;
28 void UnmapMemory(u64 address, std::size_t size) override;
29 void SetPC(u64 pc) override; 26 void SetPC(u64 pc) override;
30 u64 GetPC() const override; 27 u64 GetPC() const override;
31 u64 GetReg(int index) const override; 28 u64 GetReg(int index) const override;
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 4e07fe8b5..97d5c2a8a 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -50,11 +50,14 @@ static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_
50 50
51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value, 51static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
52 void* user_data) { 52 void* user_data) {
53 auto* const system = static_cast<System*>(user_data);
54
53 ARM_Interface::ThreadContext ctx{}; 55 ARM_Interface::ThreadContext ctx{};
54 Core::CurrentArmInterface().SaveContext(ctx); 56 system->CurrentArmInterface().SaveContext(ctx);
55 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr, 57 ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
56 ctx.pc, ctx.cpu_registers[30]); 58 ctx.pc, ctx.cpu_registers[30]);
57 return {}; 59
60 return false;
58} 61}
59 62
60ARM_Unicorn::ARM_Unicorn(System& system) : system{system} { 63ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
@@ -65,7 +68,7 @@ ARM_Unicorn::ARM_Unicorn(System& system) : system{system} {
65 68
66 uc_hook hook{}; 69 uc_hook hook{};
67 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1)); 70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
68 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1)); 71 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, &system, 0, -1));
69 if (GDBStub::IsServerEnabled()) { 72 if (GDBStub::IsServerEnabled()) {
70 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1)); 73 CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
71 last_bkpt_hit = false; 74 last_bkpt_hit = false;
@@ -76,15 +79,6 @@ ARM_Unicorn::~ARM_Unicorn() {
76 CHECKED(uc_close(uc)); 79 CHECKED(uc_close(uc));
77} 80}
78 81
79void ARM_Unicorn::MapBackingMemory(VAddr address, std::size_t size, u8* memory,
80 Kernel::VMAPermission perms) {
81 CHECKED(uc_mem_map_ptr(uc, address, size, static_cast<u32>(perms), memory));
82}
83
84void ARM_Unicorn::UnmapMemory(VAddr address, std::size_t size) {
85 CHECKED(uc_mem_unmap(uc, address, size));
86}
87
88void ARM_Unicorn::SetPC(u64 pc) { 82void ARM_Unicorn::SetPC(u64 pc) {
89 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc)); 83 CHECKED(uc_reg_write(uc, UC_ARM64_REG_PC, &pc));
90} 84}
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 34e974b4d..fe2ffd70c 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -18,9 +18,6 @@ public:
18 explicit ARM_Unicorn(System& system); 18 explicit ARM_Unicorn(System& system);
19 ~ARM_Unicorn() override; 19 ~ARM_Unicorn() override;
20 20
21 void MapBackingMemory(VAddr address, std::size_t size, u8* memory,
22 Kernel::VMAPermission perms) override;
23 void UnmapMemory(VAddr address, std::size_t size) override;
24 void SetPC(u64 pc) override; 21 void SetPC(u64 pc) override;
25 u64 GetPC() const override; 22 u64 GetPC() const override;
26 u64 GetReg(int index) const override; 23 u64 GetReg(int index) const override;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 4aceee785..20d64f3b0 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -19,6 +19,7 @@
19#include "core/file_sys/vfs_concat.h" 19#include "core/file_sys/vfs_concat.h"
20#include "core/file_sys/vfs_real.h" 20#include "core/file_sys/vfs_real.h"
21#include "core/gdbstub/gdbstub.h" 21#include "core/gdbstub/gdbstub.h"
22#include "core/hardware_interrupt_manager.h"
22#include "core/hle/kernel/client_port.h" 23#include "core/hle/kernel/client_port.h"
23#include "core/hle/kernel/kernel.h" 24#include "core/hle/kernel/kernel.h"
24#include "core/hle/kernel/process.h" 25#include "core/hle/kernel/process.h"
@@ -151,7 +152,7 @@ struct System::Impl {
151 if (!renderer->Init()) { 152 if (!renderer->Init()) {
152 return ResultStatus::ErrorVideoCore; 153 return ResultStatus::ErrorVideoCore;
153 } 154 }
154 155 interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system);
155 gpu_core = VideoCore::CreateGPU(system); 156 gpu_core = VideoCore::CreateGPU(system);
156 157
157 is_powered_on = true; 158 is_powered_on = true;
@@ -298,6 +299,7 @@ struct System::Impl {
298 std::unique_ptr<VideoCore::RendererBase> renderer; 299 std::unique_ptr<VideoCore::RendererBase> renderer;
299 std::unique_ptr<Tegra::GPU> gpu_core; 300 std::unique_ptr<Tegra::GPU> gpu_core;
300 std::shared_ptr<Tegra::DebugContext> debug_context; 301 std::shared_ptr<Tegra::DebugContext> debug_context;
302 std::unique_ptr<Core::Hardware::InterruptManager> interrupt_manager;
301 CpuCoreManager cpu_core_manager; 303 CpuCoreManager cpu_core_manager;
302 bool is_powered_on = false; 304 bool is_powered_on = false;
303 305
@@ -444,6 +446,14 @@ const Tegra::GPU& System::GPU() const {
444 return *impl->gpu_core; 446 return *impl->gpu_core;
445} 447}
446 448
449Core::Hardware::InterruptManager& System::InterruptManager() {
450 return *impl->interrupt_manager;
451}
452
453const Core::Hardware::InterruptManager& System::InterruptManager() const {
454 return *impl->interrupt_manager;
455}
456
447VideoCore::RendererBase& System::Renderer() { 457VideoCore::RendererBase& System::Renderer() {
448 return *impl->renderer; 458 return *impl->renderer;
449} 459}
diff --git a/src/core/core.h b/src/core/core.h
index 11e73278e..0138d93b0 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -70,6 +70,10 @@ namespace Core::Timing {
70class CoreTiming; 70class CoreTiming;
71} 71}
72 72
73namespace Core::Hardware {
74class InterruptManager;
75}
76
73namespace Core { 77namespace Core {
74 78
75class ARM_Interface; 79class ARM_Interface;
@@ -234,6 +238,12 @@ public:
234 /// Provides a constant reference to the core timing instance. 238 /// Provides a constant reference to the core timing instance.
235 const Timing::CoreTiming& CoreTiming() const; 239 const Timing::CoreTiming& CoreTiming() const;
236 240
241 /// Provides a reference to the interrupt manager instance.
242 Core::Hardware::InterruptManager& InterruptManager();
243
244 /// Provides a constant reference to the interrupt manager instance.
245 const Core::Hardware::InterruptManager& InterruptManager() const;
246
237 /// Provides a reference to the kernel instance. 247 /// Provides a reference to the kernel instance.
238 Kernel::KernelCore& Kernel(); 248 Kernel::KernelCore& Kernel();
239 249
@@ -327,10 +337,6 @@ private:
327 static System s_instance; 337 static System s_instance;
328}; 338};
329 339
330inline ARM_Interface& CurrentArmInterface() {
331 return System::GetInstance().CurrentArmInterface();
332}
333
334inline Kernel::Process* CurrentProcess() { 340inline Kernel::Process* CurrentProcess() {
335 return System::GetInstance().CurrentProcess(); 341 return System::GetInstance().CurrentProcess();
336} 342}
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 99b7d387d..21c410e34 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -53,16 +53,12 @@ bool CpuBarrier::Rendezvous() {
53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, 53Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier,
54 std::size_t core_index) 54 std::size_t core_index)
55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { 55 : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} {
56 if (Settings::values.cpu_jit_enabled) {
57#ifdef ARCHITECTURE_x86_64 56#ifdef ARCHITECTURE_x86_64
58 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index); 57 arm_interface = std::make_unique<ARM_Dynarmic>(system, exclusive_monitor, core_index);
59#else 58#else
60 arm_interface = std::make_unique<ARM_Unicorn>(system); 59 arm_interface = std::make_unique<ARM_Unicorn>(system);
61 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); 60 LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
62#endif 61#endif
63 } else {
64 arm_interface = std::make_unique<ARM_Unicorn>(system);
65 }
66 62
67 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); 63 scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface);
68} 64}
@@ -70,15 +66,12 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba
70Cpu::~Cpu() = default; 66Cpu::~Cpu() = default;
71 67
72std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) { 68std::unique_ptr<ExclusiveMonitor> Cpu::MakeExclusiveMonitor(std::size_t num_cores) {
73 if (Settings::values.cpu_jit_enabled) {
74#ifdef ARCHITECTURE_x86_64 69#ifdef ARCHITECTURE_x86_64
75 return std::make_unique<DynarmicExclusiveMonitor>(num_cores); 70 return std::make_unique<DynarmicExclusiveMonitor>(num_cores);
76#else 71#else
77 return nullptr; // TODO(merry): Passthrough exclusive monitor 72 // TODO(merry): Passthrough exclusive monitor
73 return nullptr;
78#endif 74#endif
79 } else {
80 return nullptr; // TODO(merry): Passthrough exclusive monitor
81 }
82} 75}
83 76
84void Cpu::RunLoop(bool tight_loop) { 77void Cpu::RunLoop(bool tight_loop) {
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index eb76174c5..7310b3602 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const {
94 return aci_file_access.permissions; 94 return aci_file_access.permissions;
95} 95}
96 96
97u32 ProgramMetadata::GetSystemResourceSize() const {
98 return npdm_header.system_resource_size;
99}
100
97const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { 101const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
98 return aci_kernel_capabilities; 102 return aci_kernel_capabilities;
99} 103}
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 43bf2820a..88ec97d85 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,6 +58,7 @@ public:
58 u32 GetMainThreadStackSize() const; 58 u32 GetMainThreadStackSize() const;
59 u64 GetTitleID() const; 59 u64 GetTitleID() const;
60 u64 GetFilesystemPermissions() const; 60 u64 GetFilesystemPermissions() const;
61 u32 GetSystemResourceSize() const;
61 const KernelCapabilityDescriptors& GetKernelCapabilities() const; 62 const KernelCapabilityDescriptors& GetKernelCapabilities() const;
62 63
63 void Print() const; 64 void Print() const;
@@ -76,7 +77,8 @@ private:
76 u8 reserved_3; 77 u8 reserved_3;
77 u8 main_thread_priority; 78 u8 main_thread_priority;
78 u8 main_thread_cpu; 79 u8 main_thread_cpu;
79 std::array<u8, 8> reserved_4; 80 std::array<u8, 4> reserved_4;
81 u32_le system_resource_size;
80 u32_le process_category; 82 u32_le process_category;
81 u32_le main_stack_size; 83 u32_le main_stack_size;
82 std::array<u8, 0x10> application_name; 84 std::array<u8, 0x10> application_name;
diff --git a/src/core/hardware_interrupt_manager.cpp b/src/core/hardware_interrupt_manager.cpp
new file mode 100644
index 000000000..c2115db2d
--- /dev/null
+++ b/src/core/hardware_interrupt_manager.cpp
@@ -0,0 +1,30 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "core/core.h"
6#include "core/core_timing.h"
7#include "core/hardware_interrupt_manager.h"
8#include "core/hle/service/nvdrv/interface.h"
9#include "core/hle/service/sm/sm.h"
10
11namespace Core::Hardware {
12
13InterruptManager::InterruptManager(Core::System& system_in) : system(system_in) {
14 gpu_interrupt_event =
15 system.CoreTiming().RegisterEvent("GPUInterrupt", [this](u64 message, s64) {
16 auto nvdrv = system.ServiceManager().GetService<Service::Nvidia::NVDRV>("nvdrv");
17 const u32 syncpt = static_cast<u32>(message >> 32);
18 const u32 value = static_cast<u32>(message);
19 nvdrv->SignalGPUInterruptSyncpt(syncpt, value);
20 });
21}
22
23InterruptManager::~InterruptManager() = default;
24
25void InterruptManager::GPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
26 const u64 msg = (static_cast<u64>(syncpoint_id) << 32ULL) | value;
27 system.CoreTiming().ScheduleEvent(10, gpu_interrupt_event, msg);
28}
29
30} // namespace Core::Hardware
diff --git a/src/core/hardware_interrupt_manager.h b/src/core/hardware_interrupt_manager.h
new file mode 100644
index 000000000..494db883a
--- /dev/null
+++ b/src/core/hardware_interrupt_manager.h
@@ -0,0 +1,31 @@
1// Copyright 2019 Yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Core {
10class System;
11}
12
13namespace Core::Timing {
14struct EventType;
15}
16
17namespace Core::Hardware {
18
19class InterruptManager {
20public:
21 explicit InterruptManager(Core::System& system);
22 ~InterruptManager();
23
24 void GPUInterruptSyncpt(u32 syncpoint_id, u32 value);
25
26private:
27 Core::System& system;
28 Core::Timing::EventType* gpu_interrupt_event{};
29};
30
31} // namespace Core::Hardware
diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h
index 879957dcb..d8ad54030 100644
--- a/src/core/hle/kernel/code_set.h
+++ b/src/core/hle/kernel/code_set.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/physical_memory.h"
11 12
12namespace Kernel { 13namespace Kernel {
13 14
@@ -77,7 +78,7 @@ struct CodeSet final {
77 } 78 }
78 79
79 /// The overall data that backs this code set. 80 /// The overall data that backs this code set.
80 std::vector<u8> memory; 81 Kernel::PhysicalMemory memory;
81 82
82 /// The segments that comprise this code set. 83 /// The segments that comprise this code set.
83 std::array<Segment, 3> segments; 84 std::array<Segment, 3> segments;
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
new file mode 100644
index 000000000..090565310
--- /dev/null
+++ b/src/core/hle/kernel/physical_memory.h
@@ -0,0 +1,19 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/alignment.h"
8
9namespace Kernel {
10
11// This encapsulation serves 2 purposes:
12// - First, to encapsulate host physical memory under a single type and set an
13// standard for managing it.
14// - Second to ensure all host backing memory used is aligned to 256 bytes due
15// to strict alignment restrictions on GPU memory.
16
17using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
18
19} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index f45ef05f6..e80a12ac3 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const {
129 return vm_manager.GetTotalPhysicalMemoryAvailable(); 129 return vm_manager.GetTotalPhysicalMemoryAvailable();
130} 130}
131 131
132u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { 132u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
133 // TODO: Subtract the personal heap size from this when the 133 return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize();
134 // personal heap is implemented.
135 return GetTotalPhysicalMemoryAvailable();
136} 134}
137 135
138u64 Process::GetTotalPhysicalMemoryUsed() const { 136u64 Process::GetTotalPhysicalMemoryUsed() const {
139 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; 137 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size +
138 GetSystemResourceUsage();
140} 139}
141 140
142u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { 141u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
143 // TODO: Subtract the personal heap size from this when the 142 return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
144 // personal heap is implemented.
145 return GetTotalPhysicalMemoryUsed();
146} 143}
147 144
148void Process::RegisterThread(const Thread* thread) { 145void Process::RegisterThread(const Thread* thread) {
@@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
172 program_id = metadata.GetTitleID(); 169 program_id = metadata.GetTitleID();
173 ideal_core = metadata.GetMainThreadCore(); 170 ideal_core = metadata.GetMainThreadCore();
174 is_64bit_process = metadata.Is64BitProgram(); 171 is_64bit_process = metadata.Is64BitProgram();
172 system_resource_size = metadata.GetSystemResourceSize();
175 173
176 vm_manager.Reset(metadata.GetAddressSpaceType()); 174 vm_manager.Reset(metadata.GetAddressSpaceType());
177 175
@@ -186,19 +184,11 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
186} 184}
187 185
188void Process::Run(s32 main_thread_priority, u64 stack_size) { 186void Process::Run(s32 main_thread_priority, u64 stack_size) {
189 // The kernel always ensures that the given stack size is page aligned. 187 AllocateMainThreadStack(stack_size);
190 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE); 188 tls_region_address = CreateTLSRegion();
191
192 // Allocate and map the main thread stack
193 // TODO(bunnei): This is heap area that should be allocated by the kernel and not mapped as part
194 // of the user address space.
195 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
196 vm_manager
197 .MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
198 0, main_thread_stack_size, MemoryState::Stack)
199 .Unwrap();
200 189
201 vm_manager.LogLayout(); 190 vm_manager.LogLayout();
191
202 ChangeStatus(ProcessStatus::Running); 192 ChangeStatus(ProcessStatus::Running);
203 193
204 SetupMainThread(*this, kernel, main_thread_priority); 194 SetupMainThread(*this, kernel, main_thread_priority);
@@ -228,6 +218,9 @@ void Process::PrepareForTermination() {
228 stop_threads(system.Scheduler(2).GetThreadList()); 218 stop_threads(system.Scheduler(2).GetThreadList());
229 stop_threads(system.Scheduler(3).GetThreadList()); 219 stop_threads(system.Scheduler(3).GetThreadList());
230 220
221 FreeTLSRegion(tls_region_address);
222 tls_region_address = 0;
223
231 ChangeStatus(ProcessStatus::Exited); 224 ChangeStatus(ProcessStatus::Exited);
232} 225}
233 226
@@ -254,7 +247,7 @@ VAddr Process::CreateTLSRegion() {
254 ASSERT(region_address.Succeeded()); 247 ASSERT(region_address.Succeeded());
255 248
256 const auto map_result = vm_manager.MapMemoryBlock( 249 const auto map_result = vm_manager.MapMemoryBlock(
257 *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0, 250 *region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0,
258 Memory::PAGE_SIZE, MemoryState::ThreadLocal); 251 Memory::PAGE_SIZE, MemoryState::ThreadLocal);
259 ASSERT(map_result.Succeeded()); 252 ASSERT(map_result.Succeeded());
260 253
@@ -284,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
284} 277}
285 278
286void Process::LoadModule(CodeSet module_, VAddr base_addr) { 279void Process::LoadModule(CodeSet module_, VAddr base_addr) {
287 const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory)); 280 const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
288 281
289 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, 282 const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
290 MemoryState memory_state) { 283 MemoryState memory_state) {
@@ -327,4 +320,16 @@ void Process::ChangeStatus(ProcessStatus new_status) {
327 WakeupAllWaitingThreads(); 320 WakeupAllWaitingThreads();
328} 321}
329 322
323void Process::AllocateMainThreadStack(u64 stack_size) {
324 // The kernel always ensures that the given stack size is page aligned.
325 main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
326
327 // Allocate and map the main thread stack
328 const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
329 vm_manager
330 .MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size),
331 0, main_thread_stack_size, MemoryState::Stack)
332 .Unwrap();
333}
334
330} // namespace Kernel 335} // namespace Kernel
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 83ea02bee..c2df451f3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -135,6 +135,11 @@ public:
135 return mutex; 135 return mutex;
136 } 136 }
137 137
138 /// Gets the address to the process' dedicated TLS region.
139 VAddr GetTLSRegionAddress() const {
140 return tls_region_address;
141 }
142
138 /// Gets the current status of the process 143 /// Gets the current status of the process
139 ProcessStatus GetStatus() const { 144 ProcessStatus GetStatus() const {
140 return status; 145 return status;
@@ -168,8 +173,24 @@ public:
168 return capabilities.GetPriorityMask(); 173 return capabilities.GetPriorityMask();
169 } 174 }
170 175
171 u32 IsVirtualMemoryEnabled() const { 176 /// Gets the amount of secure memory to allocate for memory management.
172 return is_virtual_address_memory_enabled; 177 u32 GetSystemResourceSize() const {
178 return system_resource_size;
179 }
180
181 /// Gets the amount of secure memory currently in use for memory management.
182 u32 GetSystemResourceUsage() const {
183 // On hardware, this returns the amount of system resource memory that has
184 // been used by the kernel. This is problematic for Yuzu to emulate, because
185 // system resource memory is used for page tables -- and yuzu doesn't really
186 // have a way to calculate how much memory is required for page tables for
187 // the current process at any given time.
188 // TODO: Is this even worth implementing? Games may retrieve this value via
189 // an SDK function that gets used + available system resource size for debug
190 // or diagnostic purposes. However, it seems unlikely that a game would make
191 // decisions based on how much system memory is dedicated to its page tables.
192 // Is returning a value other than zero wise?
193 return 0;
173 } 194 }
174 195
175 /// Whether this process is an AArch64 or AArch32 process. 196 /// Whether this process is an AArch64 or AArch32 process.
@@ -196,15 +217,15 @@ public:
196 u64 GetTotalPhysicalMemoryAvailable() const; 217 u64 GetTotalPhysicalMemoryAvailable() const;
197 218
198 /// Retrieves the total physical memory available to this process in bytes, 219 /// Retrieves the total physical memory available to this process in bytes,
199 /// without the size of the personal heap added to it. 220 /// without the size of the personal system resource heap added to it.
200 u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; 221 u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const;
201 222
202 /// Retrieves the total physical memory used by this process in bytes. 223 /// Retrieves the total physical memory used by this process in bytes.
203 u64 GetTotalPhysicalMemoryUsed() const; 224 u64 GetTotalPhysicalMemoryUsed() const;
204 225
205 /// Retrieves the total physical memory used by this process in bytes, 226 /// Retrieves the total physical memory used by this process in bytes,
206 /// without the size of the personal heap added to it. 227 /// without the size of the personal system resource heap added to it.
207 u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; 228 u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const;
208 229
209 /// Gets the list of all threads created with this process as their owner. 230 /// Gets the list of all threads created with this process as their owner.
210 const std::list<const Thread*>& GetThreadList() const { 231 const std::list<const Thread*>& GetThreadList() const {
@@ -280,6 +301,9 @@ private:
280 /// a process signal. 301 /// a process signal.
281 void ChangeStatus(ProcessStatus new_status); 302 void ChangeStatus(ProcessStatus new_status);
282 303
304 /// Allocates the main thread stack for the process, given the stack size in bytes.
305 void AllocateMainThreadStack(u64 stack_size);
306
283 /// Memory manager for this process. 307 /// Memory manager for this process.
284 Kernel::VMManager vm_manager; 308 Kernel::VMManager vm_manager;
285 309
@@ -298,12 +322,16 @@ private:
298 /// Title ID corresponding to the process 322 /// Title ID corresponding to the process
299 u64 program_id = 0; 323 u64 program_id = 0;
300 324
325 /// Specifies additional memory to be reserved for the process's memory management by the
326 /// system. When this is non-zero, secure memory is allocated and used for page table allocation
327 /// instead of using the normal global page tables/memory block management.
328 u32 system_resource_size = 0;
329
301 /// Resource limit descriptor for this process 330 /// Resource limit descriptor for this process
302 SharedPtr<ResourceLimit> resource_limit; 331 SharedPtr<ResourceLimit> resource_limit;
303 332
304 /// The ideal CPU core for this process, threads are scheduled on this core by default. 333 /// The ideal CPU core for this process, threads are scheduled on this core by default.
305 u8 ideal_core = 0; 334 u8 ideal_core = 0;
306 u32 is_virtual_address_memory_enabled = 0;
307 335
308 /// The Thread Local Storage area is allocated as processes create threads, 336 /// The Thread Local Storage area is allocated as processes create threads,
309 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part 337 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
@@ -338,6 +366,9 @@ private:
338 /// variable related facilities. 366 /// variable related facilities.
339 Mutex mutex; 367 Mutex mutex;
340 368
369 /// Address indicating the location of the process' dedicated TLS region.
370 VAddr tls_region_address = 0;
371
341 /// Random values for svcGetInfo RandomEntropy 372 /// Random values for svcGetInfo RandomEntropy
342 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{}; 373 std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy{};
343 374
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index f15c5ee36..a815c4eea 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
28 shared_memory->other_permissions = other_permissions; 28 shared_memory->other_permissions = other_permissions;
29 29
30 if (address == 0) { 30 if (address == 0) {
31 shared_memory->backing_block = std::make_shared<std::vector<u8>>(size); 31 shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size);
32 shared_memory->backing_block_offset = 0; 32 shared_memory->backing_block_offset = 0;
33 33
34 // Refresh the address mappings for the current process. 34 // Refresh the address mappings for the current process.
@@ -59,8 +59,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
59} 59}
60 60
61SharedPtr<SharedMemory> SharedMemory::CreateForApplet( 61SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
62 KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size, 62 KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
63 MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { 63 u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
64 SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel)); 64 SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));
65 65
66 shared_memory->owner_process = nullptr; 66 shared_memory->owner_process = nullptr;
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index c2b6155e1..01ca6dcd2 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -10,6 +10,7 @@
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "core/hle/kernel/object.h" 12#include "core/hle/kernel/object.h"
13#include "core/hle/kernel/physical_memory.h"
13#include "core/hle/kernel/process.h" 14#include "core/hle/kernel/process.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15 16
@@ -62,12 +63,10 @@ public:
62 * block. 63 * block.
63 * @param name Optional object name, used for debugging purposes. 64 * @param name Optional object name, used for debugging purposes.
64 */ 65 */
65 static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel, 66 static SharedPtr<SharedMemory> CreateForApplet(
66 std::shared_ptr<std::vector<u8>> heap_block, 67 KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
67 std::size_t offset, u64 size, 68 u64 size, MemoryPermission permissions, MemoryPermission other_permissions,
68 MemoryPermission permissions, 69 std::string name = "Unknown Applet");
69 MemoryPermission other_permissions,
70 std::string name = "Unknown Applet");
71 70
72 std::string GetTypeName() const override { 71 std::string GetTypeName() const override {
73 return "SharedMemory"; 72 return "SharedMemory";
@@ -135,7 +134,7 @@ private:
135 ~SharedMemory() override; 134 ~SharedMemory() override;
136 135
137 /// Backing memory for this shared memory block. 136 /// Backing memory for this shared memory block.
138 std::shared_ptr<std::vector<u8>> backing_block; 137 std::shared_ptr<PhysicalMemory> backing_block;
139 /// Offset into the backing block for this shared memory. 138 /// Offset into the backing block for this shared memory.
140 std::size_t backing_block_offset = 0; 139 std::size_t backing_block_offset = 0;
141 /// Size of the memory block. Page-aligned. 140 /// Size of the memory block. Page-aligned.
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 332573a95..1fd1a732a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -318,7 +318,14 @@ static ResultCode UnmapMemory(Core::System& system, VAddr dst_addr, VAddr src_ad
318 return result; 318 return result;
319 } 319 }
320 320
321 return vm_manager.UnmapRange(dst_addr, size); 321 const auto unmap_res = vm_manager.UnmapRange(dst_addr, size);
322
323 // Reprotect the source mapping on success
324 if (unmap_res.IsSuccess()) {
325 ASSERT(vm_manager.ReprotectRange(src_addr, size, VMAPermission::ReadWrite).IsSuccess());
326 }
327
328 return unmap_res;
322} 329}
323 330
324/// Connect to an OS service given the port name, returns the handle to the port to out 331/// Connect to an OS service given the port name, returns the handle to the port to out
@@ -729,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
729 StackRegionBaseAddr = 14, 736 StackRegionBaseAddr = 14,
730 StackRegionSize = 15, 737 StackRegionSize = 15,
731 // 3.0.0+ 738 // 3.0.0+
732 IsVirtualAddressMemoryEnabled = 16, 739 SystemResourceSize = 16,
733 PersonalMmHeapUsage = 17, 740 SystemResourceUsage = 17,
734 TitleId = 18, 741 TitleId = 18,
735 // 4.0.0+ 742 // 4.0.0+
736 PrivilegedProcessId = 19, 743 PrivilegedProcessId = 19,
737 // 5.0.0+ 744 // 5.0.0+
738 UserExceptionContextAddr = 20, 745 UserExceptionContextAddr = 20,
739 // 6.0.0+ 746 // 6.0.0+
740 TotalPhysicalMemoryAvailableWithoutMmHeap = 21, 747 TotalPhysicalMemoryAvailableWithoutSystemResource = 21,
741 TotalPhysicalMemoryUsedWithoutMmHeap = 22, 748 TotalPhysicalMemoryUsedWithoutSystemResource = 22,
742 }; 749 };
743 750
744 const auto info_id_type = static_cast<GetInfoType>(info_id); 751 const auto info_id_type = static_cast<GetInfoType>(info_id);
@@ -756,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
756 case GetInfoType::StackRegionSize: 763 case GetInfoType::StackRegionSize:
757 case GetInfoType::TotalPhysicalMemoryAvailable: 764 case GetInfoType::TotalPhysicalMemoryAvailable:
758 case GetInfoType::TotalPhysicalMemoryUsed: 765 case GetInfoType::TotalPhysicalMemoryUsed:
759 case GetInfoType::IsVirtualAddressMemoryEnabled: 766 case GetInfoType::SystemResourceSize:
760 case GetInfoType::PersonalMmHeapUsage: 767 case GetInfoType::SystemResourceUsage:
761 case GetInfoType::TitleId: 768 case GetInfoType::TitleId:
762 case GetInfoType::UserExceptionContextAddr: 769 case GetInfoType::UserExceptionContextAddr:
763 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 770 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
764 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { 771 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
765 if (info_sub_id != 0) { 772 if (info_sub_id != 0) {
766 return ERR_INVALID_ENUM_VALUE; 773 return ERR_INVALID_ENUM_VALUE;
767 } 774 }
@@ -822,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
822 *result = process->GetTotalPhysicalMemoryUsed(); 829 *result = process->GetTotalPhysicalMemoryUsed();
823 return RESULT_SUCCESS; 830 return RESULT_SUCCESS;
824 831
825 case GetInfoType::IsVirtualAddressMemoryEnabled: 832 case GetInfoType::SystemResourceSize:
826 *result = process->IsVirtualMemoryEnabled(); 833 *result = process->GetSystemResourceSize();
834 return RESULT_SUCCESS;
835
836 case GetInfoType::SystemResourceUsage:
837 LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage");
838 *result = process->GetSystemResourceUsage();
827 return RESULT_SUCCESS; 839 return RESULT_SUCCESS;
828 840
829 case GetInfoType::TitleId: 841 case GetInfoType::TitleId:
@@ -831,17 +843,15 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
831 return RESULT_SUCCESS; 843 return RESULT_SUCCESS;
832 844
833 case GetInfoType::UserExceptionContextAddr: 845 case GetInfoType::UserExceptionContextAddr:
834 LOG_WARNING(Kernel_SVC, 846 *result = process->GetTLSRegionAddress();
835 "(STUBBED) Attempted to query user exception context address, returned 0");
836 *result = 0;
837 return RESULT_SUCCESS; 847 return RESULT_SUCCESS;
838 848
839 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 849 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
840 *result = process->GetTotalPhysicalMemoryAvailable(); 850 *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource();
841 return RESULT_SUCCESS; 851 return RESULT_SUCCESS;
842 852
843 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: 853 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource:
844 *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); 854 *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource();
845 return RESULT_SUCCESS; 855 return RESULT_SUCCESS;
846 856
847 default: 857 default:
@@ -946,6 +956,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
946 } 956 }
947} 957}
948 958
959/// Maps memory at a desired address
960static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
961 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
962
963 if (!Common::Is4KBAligned(addr)) {
964 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
965 return ERR_INVALID_ADDRESS;
966 }
967
968 if (!Common::Is4KBAligned(size)) {
969 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
970 return ERR_INVALID_SIZE;
971 }
972
973 if (size == 0) {
974 LOG_ERROR(Kernel_SVC, "Size is zero");
975 return ERR_INVALID_SIZE;
976 }
977
978 if (!(addr < addr + size)) {
979 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
980 return ERR_INVALID_MEMORY_RANGE;
981 }
982
983 Process* const current_process = system.Kernel().CurrentProcess();
984 auto& vm_manager = current_process->VMManager();
985
986 if (current_process->GetSystemResourceSize() == 0) {
987 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
988 return ERR_INVALID_STATE;
989 }
990
991 if (!vm_manager.IsWithinMapRegion(addr, size)) {
992 LOG_ERROR(Kernel_SVC, "Range not within map region");
993 return ERR_INVALID_MEMORY_RANGE;
994 }
995
996 return vm_manager.MapPhysicalMemory(addr, size);
997}
998
999/// Unmaps memory previously mapped via MapPhysicalMemory
1000static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
1001 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
1002
1003 if (!Common::Is4KBAligned(addr)) {
1004 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
1005 return ERR_INVALID_ADDRESS;
1006 }
1007
1008 if (!Common::Is4KBAligned(size)) {
1009 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
1010 return ERR_INVALID_SIZE;
1011 }
1012
1013 if (size == 0) {
1014 LOG_ERROR(Kernel_SVC, "Size is zero");
1015 return ERR_INVALID_SIZE;
1016 }
1017
1018 if (!(addr < addr + size)) {
1019 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
1020 return ERR_INVALID_MEMORY_RANGE;
1021 }
1022
1023 Process* const current_process = system.Kernel().CurrentProcess();
1024 auto& vm_manager = current_process->VMManager();
1025
1026 if (current_process->GetSystemResourceSize() == 0) {
1027 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
1028 return ERR_INVALID_STATE;
1029 }
1030
1031 if (!vm_manager.IsWithinMapRegion(addr, size)) {
1032 LOG_ERROR(Kernel_SVC, "Range not within map region");
1033 return ERR_INVALID_MEMORY_RANGE;
1034 }
1035
1036 return vm_manager.UnmapPhysicalMemory(addr, size);
1037}
1038
949/// Sets the thread activity 1039/// Sets the thread activity
950static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { 1040static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
951 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); 1041 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
@@ -1647,8 +1737,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var
1647// Wait for an address (via Address Arbiter) 1737// Wait for an address (via Address Arbiter)
1648static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value, 1738static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, s32 value,
1649 s64 timeout) { 1739 s64 timeout) {
1650 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", 1740 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}", address,
1651 address, type, value, timeout); 1741 type, value, timeout);
1652 1742
1653 // If the passed address is a kernel virtual address, return invalid memory state. 1743 // If the passed address is a kernel virtual address, return invalid memory state.
1654 if (Memory::IsKernelVirtualAddress(address)) { 1744 if (Memory::IsKernelVirtualAddress(address)) {
@@ -1670,8 +1760,8 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type,
1670// Signals to an address (via Address Arbiter) 1760// Signals to an address (via Address Arbiter)
1671static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value, 1761static ResultCode SignalToAddress(Core::System& system, VAddr address, u32 type, s32 value,
1672 s32 num_to_wake) { 1762 s32 num_to_wake) {
1673 LOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", 1763 LOG_TRACE(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}",
1674 address, type, value, num_to_wake); 1764 address, type, value, num_to_wake);
1675 1765
1676 // If the passed address is a kernel virtual address, return invalid memory state. 1766 // If the passed address is a kernel virtual address, return invalid memory state.
1677 if (Memory::IsKernelVirtualAddress(address)) { 1767 if (Memory::IsKernelVirtualAddress(address)) {
@@ -2303,8 +2393,8 @@ static const FunctionDef SVC_Table[] = {
2303 {0x29, SvcWrap<GetInfo>, "GetInfo"}, 2393 {0x29, SvcWrap<GetInfo>, "GetInfo"},
2304 {0x2A, nullptr, "FlushEntireDataCache"}, 2394 {0x2A, nullptr, "FlushEntireDataCache"},
2305 {0x2B, nullptr, "FlushDataCache"}, 2395 {0x2B, nullptr, "FlushDataCache"},
2306 {0x2C, nullptr, "MapPhysicalMemory"}, 2396 {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"},
2307 {0x2D, nullptr, "UnmapPhysicalMemory"}, 2397 {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
2308 {0x2E, nullptr, "GetFutureThreadInfo"}, 2398 {0x2E, nullptr, "GetFutureThreadInfo"},
2309 {0x2F, nullptr, "GetLastThreadInfo"}, 2399 {0x2F, nullptr, "GetLastThreadInfo"},
2310 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, 2400 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 865473c6f..c2d8d0dc3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) {
32 FuncReturn(system, func(system, Param(system, 0)).raw); 32 FuncReturn(system, func(system, Param(system, 0)).raw);
33} 33}
34 34
35template <ResultCode func(Core::System&, u64, u64)>
36void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
38}
39
35template <ResultCode func(Core::System&, u32)> 40template <ResultCode func(Core::System&, u32)>
36void SvcWrap(Core::System& system) { 41void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 42 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp
index 26c4e5e67..1113c815e 100644
--- a/src/core/hle/kernel/transfer_memory.cpp
+++ b/src/core/hle/kernel/transfer_memory.cpp
@@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
47 return ERR_INVALID_STATE; 47 return ERR_INVALID_STATE;
48 } 48 }
49 49
50 backing_block = std::make_shared<std::vector<u8>>(size); 50 backing_block = std::make_shared<PhysicalMemory>(size);
51 51
52 const auto map_state = owner_permissions == MemoryPermission::None 52 const auto map_state = owner_permissions == MemoryPermission::None
53 ? MemoryState::TransferMemoryIsolated 53 ? MemoryState::TransferMemoryIsolated
diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h
index a140b1e2b..6be9dc094 100644
--- a/src/core/hle/kernel/transfer_memory.h
+++ b/src/core/hle/kernel/transfer_memory.h
@@ -8,6 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "core/hle/kernel/object.h" 10#include "core/hle/kernel/object.h"
11#include "core/hle/kernel/physical_memory.h"
11 12
12union ResultCode; 13union ResultCode;
13 14
@@ -82,7 +83,7 @@ private:
82 ~TransferMemory() override; 83 ~TransferMemory() override;
83 84
84 /// Memory block backing this instance. 85 /// Memory block backing this instance.
85 std::shared_ptr<std::vector<u8>> backing_block; 86 std::shared_ptr<PhysicalMemory> backing_block;
86 87
87 /// The base address for the memory managed by this instance. 88 /// The base address for the memory managed by this instance.
88 VAddr base_address = 0; 89 VAddr base_address = 0;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 501544090..40cea1e7c 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -5,13 +5,15 @@
5#include <algorithm> 5#include <algorithm>
6#include <iterator> 6#include <iterator>
7#include <utility> 7#include <utility>
8#include "common/alignment.h"
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/logging/log.h" 10#include "common/logging/log.h"
10#include "common/memory_hook.h" 11#include "common/memory_hook.h"
11#include "core/arm/arm_interface.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/file_sys/program_metadata.h" 13#include "core/file_sys/program_metadata.h"
14#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
15#include "core/hle/kernel/process.h"
16#include "core/hle/kernel/resource_limit.h"
15#include "core/hle/kernel/vm_manager.h" 17#include "core/hle/kernel/vm_manager.h"
16#include "core/memory.h" 18#include "core/memory.h"
17#include "core/memory_setup.h" 19#include "core/memory_setup.h"
@@ -49,10 +51,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
49 type != next.type) { 51 type != next.type) {
50 return false; 52 return false;
51 } 53 }
52 if (type == VMAType::AllocatedMemoryBlock && 54 if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) {
53 (backing_block != next.backing_block || offset + size != next.offset)) { 55 // TODO: Can device mapped memory be merged sanely?
56 // Not merging it may cause inaccuracies versus hardware when memory layout is queried.
54 return false; 57 return false;
55 } 58 }
59 if (type == VMAType::AllocatedMemoryBlock) {
60 return true;
61 }
56 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { 62 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) {
57 return false; 63 return false;
58 } 64 }
@@ -98,9 +104,9 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
98} 104}
99 105
100ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, 106ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
101 std::shared_ptr<std::vector<u8>> block, 107 std::shared_ptr<PhysicalMemory> block,
102 std::size_t offset, u64 size, 108 std::size_t offset, u64 size,
103 MemoryState state) { 109 MemoryState state, VMAPermission perm) {
104 ASSERT(block != nullptr); 110 ASSERT(block != nullptr);
105 ASSERT(offset + size <= block->size()); 111 ASSERT(offset + size <= block->size());
106 112
@@ -109,17 +115,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
109 VirtualMemoryArea& final_vma = vma_handle->second; 115 VirtualMemoryArea& final_vma = vma_handle->second;
110 ASSERT(final_vma.size == size); 116 ASSERT(final_vma.size == size);
111 117
112 system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
113 VMAPermission::ReadWriteExecute);
114 system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
115 VMAPermission::ReadWriteExecute);
116 system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
117 VMAPermission::ReadWriteExecute);
118 system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
119 VMAPermission::ReadWriteExecute);
120
121 final_vma.type = VMAType::AllocatedMemoryBlock; 118 final_vma.type = VMAType::AllocatedMemoryBlock;
122 final_vma.permissions = VMAPermission::ReadWrite; 119 final_vma.permissions = perm;
123 final_vma.state = state; 120 final_vma.state = state;
124 final_vma.backing_block = std::move(block); 121 final_vma.backing_block = std::move(block);
125 final_vma.offset = offset; 122 final_vma.offset = offset;
@@ -137,11 +134,6 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
137 VirtualMemoryArea& final_vma = vma_handle->second; 134 VirtualMemoryArea& final_vma = vma_handle->second;
138 ASSERT(final_vma.size == size); 135 ASSERT(final_vma.size == size);
139 136
140 system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
141 system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
142 system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
143 system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
144
145 final_vma.type = VMAType::BackingMemory; 137 final_vma.type = VMAType::BackingMemory;
146 final_vma.permissions = VMAPermission::ReadWrite; 138 final_vma.permissions = VMAPermission::ReadWrite;
147 final_vma.state = state; 139 final_vma.state = state;
@@ -230,11 +222,6 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
230 222
231 ASSERT(FindVMA(target)->second.size >= size); 223 ASSERT(FindVMA(target)->second.size >= size);
232 224
233 system.ArmInterface(0).UnmapMemory(target, size);
234 system.ArmInterface(1).UnmapMemory(target, size);
235 system.ArmInterface(2).UnmapMemory(target, size);
236 system.ArmInterface(3).UnmapMemory(target, size);
237
238 return RESULT_SUCCESS; 225 return RESULT_SUCCESS;
239} 226}
240 227
@@ -274,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
274 261
275 if (heap_memory == nullptr) { 262 if (heap_memory == nullptr) {
276 // Initialize heap 263 // Initialize heap
277 heap_memory = std::make_shared<std::vector<u8>>(size); 264 heap_memory = std::make_shared<PhysicalMemory>(size);
278 heap_end = heap_region_base + size; 265 heap_end = heap_region_base + size;
279 } else { 266 } else {
280 UnmapRange(heap_region_base, GetCurrentHeapSize()); 267 UnmapRange(heap_region_base, GetCurrentHeapSize());
@@ -308,6 +295,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
308 return MakeResult<VAddr>(heap_region_base); 295 return MakeResult<VAddr>(heap_region_base);
309} 296}
310 297
298ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
299 const auto end_addr = target + size;
300 const auto last_addr = end_addr - 1;
301 VAddr cur_addr = target;
302
303 ResultCode result = RESULT_SUCCESS;
304
305 // Check how much memory we've already mapped.
306 const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
307 if (mapped_size_result.Failed()) {
308 return mapped_size_result.Code();
309 }
310
311 // If we've already mapped the desired amount, return early.
312 const std::size_t mapped_size = *mapped_size_result;
313 if (mapped_size == size) {
314 return RESULT_SUCCESS;
315 }
316
317 // Check that we can map the memory we want.
318 const auto res_limit = system.CurrentProcess()->GetResourceLimit();
319 const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) -
320 res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory);
321 if (physmem_remaining < (size - mapped_size)) {
322 return ERR_RESOURCE_LIMIT_EXCEEDED;
323 }
324
325 // Keep track of the memory regions we unmap.
326 std::vector<std::pair<u64, u64>> mapped_regions;
327
328 // Iterate, trying to map memory.
329 {
330 cur_addr = target;
331
332 auto iter = FindVMA(target);
333 ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
334
335 while (true) {
336 const auto& vma = iter->second;
337 const auto vma_start = vma.base;
338 const auto vma_end = vma_start + vma.size;
339 const auto vma_last = vma_end - 1;
340
341 // Map the memory block
342 const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
343 if (vma.state == MemoryState::Unmapped) {
344 const auto map_res =
345 MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0,
346 map_size, MemoryState::Heap, VMAPermission::ReadWrite);
347 result = map_res.Code();
348 if (result.IsError()) {
349 break;
350 }
351
352 mapped_regions.emplace_back(cur_addr, map_size);
353 }
354
355 // Break once we hit the end of the range.
356 if (last_addr <= vma_last) {
357 break;
358 }
359
360 // Advance to the next block.
361 cur_addr = vma_end;
362 iter = FindVMA(cur_addr);
363 ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
364 }
365 }
366
367 // If we failed, unmap memory.
368 if (result.IsError()) {
369 for (const auto [unmap_address, unmap_size] : mapped_regions) {
370 ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
371 "MapPhysicalMemory un-map on error");
372 }
373
374 return result;
375 }
376
377 // Update amount of mapped physical memory.
378 physical_memory_mapped += size - mapped_size;
379
380 return RESULT_SUCCESS;
381}
382
383ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
384 const auto end_addr = target + size;
385 const auto last_addr = end_addr - 1;
386 VAddr cur_addr = target;
387
388 ResultCode result = RESULT_SUCCESS;
389
390 // Check how much memory is currently mapped.
391 const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
392 if (mapped_size_result.Failed()) {
393 return mapped_size_result.Code();
394 }
395
396 // If we've already unmapped all the memory, return early.
397 const std::size_t mapped_size = *mapped_size_result;
398 if (mapped_size == 0) {
399 return RESULT_SUCCESS;
400 }
401
402 // Keep track of the memory regions we unmap.
403 std::vector<std::pair<u64, u64>> unmapped_regions;
404
405 // Try to unmap regions.
406 {
407 cur_addr = target;
408
409 auto iter = FindVMA(target);
410 ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
411
412 while (true) {
413 const auto& vma = iter->second;
414 const auto vma_start = vma.base;
415 const auto vma_end = vma_start + vma.size;
416 const auto vma_last = vma_end - 1;
417
418 // Unmap the memory block
419 const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
420 if (vma.state == MemoryState::Heap) {
421 result = UnmapRange(cur_addr, unmap_size);
422 if (result.IsError()) {
423 break;
424 }
425
426 unmapped_regions.emplace_back(cur_addr, unmap_size);
427 }
428
429 // Break once we hit the end of the range.
430 if (last_addr <= vma_last) {
431 break;
432 }
433
434 // Advance to the next block.
435 cur_addr = vma_end;
436 iter = FindVMA(cur_addr);
437 ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
438 }
439 }
440
441 // If we failed, re-map regions.
442 // TODO: Preserve memory contents?
443 if (result.IsError()) {
444 for (const auto [map_address, map_size] : unmapped_regions) {
445 const auto remap_res =
446 MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0,
447 map_size, MemoryState::Heap, VMAPermission::None);
448 ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
449 }
450 }
451
452 // Update mapped amount
453 physical_memory_mapped -= mapped_size;
454
455 return RESULT_SUCCESS;
456}
457
311ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { 458ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
312 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; 459 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
313 const auto src_check_result = CheckRangeState( 460 const auto src_check_result = CheckRangeState(
@@ -447,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
447 ASSERT_MSG(vma_offset + size <= vma->second.size, 594 ASSERT_MSG(vma_offset + size <= vma->second.size,
448 "Shared memory exceeds bounds of mapped block"); 595 "Shared memory exceeds bounds of mapped block");
449 596
450 const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block; 597 const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block;
451 const std::size_t backing_block_offset = vma->second.offset + vma_offset; 598 const std::size_t backing_block_offset = vma->second.offset + vma_offset;
452 599
453 CASCADE_RESULT(auto new_vma, 600 CASCADE_RESULT(auto new_vma,
@@ -455,12 +602,12 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
455 // Protect mirror with permissions from old region 602 // Protect mirror with permissions from old region
456 Reprotect(new_vma, vma->second.permissions); 603 Reprotect(new_vma, vma->second.permissions);
457 // Remove permissions from old region 604 // Remove permissions from old region
458 Reprotect(vma, VMAPermission::None); 605 ReprotectRange(src_addr, size, VMAPermission::None);
459 606
460 return RESULT_SUCCESS; 607 return RESULT_SUCCESS;
461} 608}
462 609
463void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) { 610void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) {
464 // If this ever proves to have a noticeable performance impact, allow users of the function to 611 // If this ever proves to have a noticeable performance impact, allow users of the function to
465 // specify a specific range of addresses to limit the scan to. 612 // specify a specific range of addresses to limit the scan to.
466 for (const auto& p : vma_map) { 613 for (const auto& p : vma_map) {
@@ -588,14 +735,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
588VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { 735VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
589 const VMAIter next_vma = std::next(iter); 736 const VMAIter next_vma = std::next(iter);
590 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { 737 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
591 iter->second.size += next_vma->second.size; 738 MergeAdjacentVMA(iter->second, next_vma->second);
592 vma_map.erase(next_vma); 739 vma_map.erase(next_vma);
593 } 740 }
594 741
595 if (iter != vma_map.begin()) { 742 if (iter != vma_map.begin()) {
596 VMAIter prev_vma = std::prev(iter); 743 VMAIter prev_vma = std::prev(iter);
597 if (prev_vma->second.CanBeMergedWith(iter->second)) { 744 if (prev_vma->second.CanBeMergedWith(iter->second)) {
598 prev_vma->second.size += iter->second.size; 745 MergeAdjacentVMA(prev_vma->second, iter->second);
599 vma_map.erase(iter); 746 vma_map.erase(iter);
600 iter = prev_vma; 747 iter = prev_vma;
601 } 748 }
@@ -604,6 +751,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
604 return iter; 751 return iter;
605} 752}
606 753
754void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) {
755 ASSERT(left.CanBeMergedWith(right));
756
757 // Always merge allocated memory blocks, even when they don't share the same backing block.
758 if (left.type == VMAType::AllocatedMemoryBlock &&
759 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
760 // Check if we can save work.
761 if (left.offset == 0 && left.size == left.backing_block->size()) {
762 // Fast case: left is an entire backing block.
763 left.backing_block->insert(left.backing_block->end(),
764 right.backing_block->begin() + right.offset,
765 right.backing_block->begin() + right.offset + right.size);
766 } else {
767 // Slow case: make a new memory block for left and right.
768 auto new_memory = std::make_shared<PhysicalMemory>();
769 new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
770 left.backing_block->begin() + left.offset + left.size);
771 new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
772 right.backing_block->begin() + right.offset + right.size);
773 left.backing_block = new_memory;
774 left.offset = 0;
775 }
776
777 // Page table update is needed, because backing memory changed.
778 left.size += right.size;
779 UpdatePageTableForVMA(left);
780 } else {
781 // Just update the size.
782 left.size += right.size;
783 }
784}
785
607void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { 786void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
608 switch (vma.type) { 787 switch (vma.type) {
609 case VMAType::Free: 788 case VMAType::Free:
@@ -778,6 +957,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo
778 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); 957 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask));
779} 958}
780 959
960ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
961 std::size_t size) const {
962 const VAddr end_addr = address + size;
963 const VAddr last_addr = end_addr - 1;
964 std::size_t mapped_size = 0;
965
966 VAddr cur_addr = address;
967 auto iter = FindVMA(cur_addr);
968 ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
969
970 while (true) {
971 const auto& vma = iter->second;
972 const VAddr vma_start = vma.base;
973 const VAddr vma_end = vma_start + vma.size;
974 const VAddr vma_last = vma_end - 1;
975
976 // Add size if relevant.
977 if (vma.state != MemoryState::Unmapped) {
978 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
979 }
980
981 // Break once we hit the end of the range.
982 if (last_addr <= vma_last) {
983 break;
984 }
985
986 // Advance to the next block.
987 cur_addr = vma_end;
988 iter = std::next(iter);
989 ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
990 }
991
992 return MakeResult(mapped_size);
993}
994
995ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
996 std::size_t size) const {
997 const VAddr end_addr = address + size;
998 const VAddr last_addr = end_addr - 1;
999 std::size_t mapped_size = 0;
1000
1001 VAddr cur_addr = address;
1002 auto iter = FindVMA(cur_addr);
1003 ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
1004
1005 while (true) {
1006 const auto& vma = iter->second;
1007 const auto vma_start = vma.base;
1008 const auto vma_end = vma_start + vma.size;
1009 const auto vma_last = vma_end - 1;
1010 const auto state = vma.state;
1011 const auto attr = vma.attribute;
1012
1013 // Memory within region must be free or mapped heap.
1014 if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) ||
1015 (state == MemoryState::Unmapped))) {
1016 return ERR_INVALID_ADDRESS_STATE;
1017 }
1018
1019 // Add size if relevant.
1020 if (state != MemoryState::Unmapped) {
1021 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
1022 }
1023
1024 // Break once we hit the end of the range.
1025 if (last_addr <= vma_last) {
1026 break;
1027 }
1028
1029 // Advance to the next block.
1030 cur_addr = vma_end;
1031 iter = std::next(iter);
1032 ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
1033 }
1034
1035 return MakeResult(mapped_size);
1036}
1037
781u64 VMManager::GetTotalPhysicalMemoryAvailable() const { 1038u64 VMManager::GetTotalPhysicalMemoryAvailable() const {
782 LOG_WARNING(Kernel, "(STUBBED) called"); 1039 LOG_WARNING(Kernel, "(STUBBED) called");
783 return 0xF8000000; 1040 return 0xF8000000;
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 9fe6ac3f4..b18cde619 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h" 12#include "common/memory_hook.h"
13#include "common/page_table.h" 13#include "common/page_table.h"
14#include "core/hle/kernel/physical_memory.h"
14#include "core/hle/result.h" 15#include "core/hle/result.h"
15#include "core/memory.h" 16#include "core/memory.h"
16 17
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 291
291 // Settings for type = AllocatedMemoryBlock 292 // Settings for type = AllocatedMemoryBlock
292 /// Memory block backing this VMA. 293 /// Memory block backing this VMA.
293 std::shared_ptr<std::vector<u8>> backing_block = nullptr; 294 std::shared_ptr<PhysicalMemory> backing_block = nullptr;
294 /// Offset into the backing_memory the mapping starts from. 295 /// Offset into the backing_memory the mapping starts from.
295 std::size_t offset = 0; 296 std::size_t offset = 0;
296 297
@@ -348,8 +349,9 @@ public:
348 * @param size Size of the mapping. 349 * @param size Size of the mapping.
349 * @param state MemoryState tag to attach to the VMA. 350 * @param state MemoryState tag to attach to the VMA.
350 */ 351 */
351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, 352 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block,
352 std::size_t offset, u64 size, MemoryState state); 353 std::size_t offset, u64 size, MemoryState state,
354 VMAPermission perm = VMAPermission::ReadWrite);
353 355
354 /** 356 /**
355 * Maps an unmanaged host memory pointer at a given address. 357 * Maps an unmanaged host memory pointer at a given address.
@@ -450,6 +452,34 @@ public:
450 /// 452 ///
451 ResultVal<VAddr> SetHeapSize(u64 size); 453 ResultVal<VAddr> SetHeapSize(u64 size);
452 454
455 /// Maps memory at a given address.
456 ///
457 /// @param addr The virtual address to map memory at.
458 /// @param size The amount of memory to map.
459 ///
460 /// @note The destination address must lie within the Map region.
461 ///
462 /// @note This function requires that SystemResourceSize be non-zero,
463 /// however, this is just because if it were not then the
464 /// resulting page tables could be exploited on hardware by
465 /// a malicious program. SystemResource usage does not need
466 /// to be explicitly checked or updated here.
467 ResultCode MapPhysicalMemory(VAddr target, u64 size);
468
469 /// Unmaps memory at a given address.
470 ///
471 /// @param addr The virtual address to unmap memory at.
472 /// @param size The amount of memory to unmap.
473 ///
474 /// @note The destination address must lie within the Map region.
475 ///
476 /// @note This function requires that SystemResourceSize be non-zero,
477 /// however, this is just because if it were not then the
478 /// resulting page tables could be exploited on hardware by
479 /// a malicious program. SystemResource usage does not need
480 /// to be explicitly checked or updated here.
481 ResultCode UnmapPhysicalMemory(VAddr target, u64 size);
482
453 /// Maps a region of memory as code memory. 483 /// Maps a region of memory as code memory.
454 /// 484 ///
455 /// @param dst_address The base address of the region to create the aliasing memory region. 485 /// @param dst_address The base address of the region to create the aliasing memory region.
@@ -518,7 +548,7 @@ public:
518 * Scans all VMAs and updates the page table range of any that use the given vector as backing 548 * Scans all VMAs and updates the page table range of any that use the given vector as backing
519 * memory. This should be called after any operation that causes reallocation of the vector. 549 * memory. This should be called after any operation that causes reallocation of the vector.
520 */ 550 */
521 void RefreshMemoryBlockMappings(const std::vector<u8>* block); 551 void RefreshMemoryBlockMappings(const PhysicalMemory* block);
522 552
523 /// Dumps the address space layout to the log, for debugging 553 /// Dumps the address space layout to the log, for debugging
524 void LogLayout() const; 554 void LogLayout() const;
@@ -657,6 +687,11 @@ private:
657 */ 687 */
658 VMAIter MergeAdjacent(VMAIter vma); 688 VMAIter MergeAdjacent(VMAIter vma);
659 689
690 /**
691 * Merges two adjacent VMAs.
692 */
693 void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right);
694
660 /// Updates the pages corresponding to this VMA so they match the VMA's attributes. 695 /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
661 void UpdatePageTableForVMA(const VirtualMemoryArea& vma); 696 void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
662 697
@@ -701,6 +736,13 @@ private:
701 MemoryAttribute attribute_mask, MemoryAttribute attribute, 736 MemoryAttribute attribute_mask, MemoryAttribute attribute,
702 MemoryAttribute ignore_mask) const; 737 MemoryAttribute ignore_mask) const;
703 738
739 /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
740 ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
741
742 /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range.
743 ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
744 std::size_t size) const;
745
704 /** 746 /**
705 * A map covering the entirety of the managed address space, keyed by the `base` field of each 747 * A map covering the entirety of the managed address space, keyed by the `base` field of each
706 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant 748 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant
@@ -736,12 +778,17 @@ private:
736 // the entire virtual address space extents that bound the allocations, including any holes. 778 // the entire virtual address space extents that bound the allocations, including any holes.
737 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous 779 // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
738 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. 780 // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
739 std::shared_ptr<std::vector<u8>> heap_memory; 781 std::shared_ptr<PhysicalMemory> heap_memory;
740 782
741 // The end of the currently allocated heap. This is not an inclusive 783 // The end of the currently allocated heap. This is not an inclusive
742 // end of the range. This is essentially 'base_address + current_size'. 784 // end of the range. This is essentially 'base_address + current_size'.
743 VAddr heap_end = 0; 785 VAddr heap_end = 0;
744 786
787 // The current amount of memory mapped via MapPhysicalMemory.
788 // This is used here (and in Nintendo's kernel) only for debugging, and does not impact
789 // any behavior.
790 u64 physical_memory_mapped = 0;
791
745 Core::System& system; 792 Core::System& system;
746}; 793};
747} // namespace Kernel 794} // namespace Kernel
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 9fdcf2965..a192a1f5f 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -266,8 +266,8 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
266 {65, nullptr, "ReportUserIsActive"}, 266 {65, nullptr, "ReportUserIsActive"},
267 {66, nullptr, "GetCurrentIlluminance"}, 267 {66, nullptr, "GetCurrentIlluminance"},
268 {67, nullptr, "IsIlluminanceAvailable"}, 268 {67, nullptr, "IsIlluminanceAvailable"},
269 {68, nullptr, "SetAutoSleepDisabled"}, 269 {68, &ISelfController::SetAutoSleepDisabled, "SetAutoSleepDisabled"},
270 {69, nullptr, "IsAutoSleepDisabled"}, 270 {69, &ISelfController::IsAutoSleepDisabled, "IsAutoSleepDisabled"},
271 {70, nullptr, "ReportMultimediaError"}, 271 {70, nullptr, "ReportMultimediaError"},
272 {71, nullptr, "GetCurrentIlluminanceEx"}, 272 {71, nullptr, "GetCurrentIlluminanceEx"},
273 {80, nullptr, "SetWirelessPriorityMode"}, 273 {80, nullptr, "SetWirelessPriorityMode"},
@@ -454,6 +454,34 @@ void ISelfController::GetIdleTimeDetectionExtension(Kernel::HLERequestContext& c
454 rb.Push<u32>(idle_time_detection_extension); 454 rb.Push<u32>(idle_time_detection_extension);
455} 455}
456 456
457void ISelfController::SetAutoSleepDisabled(Kernel::HLERequestContext& ctx) {
458 IPC::RequestParser rp{ctx};
459 is_auto_sleep_disabled = rp.Pop<bool>();
460
461 // On the system itself, if the previous state of is_auto_sleep_disabled
462 // differed from the current value passed in, it'd signify the internal
463 // window manager to update (and also increment some statistics like update counts)
464 //
465 // It'd also indicate this change to an idle handling context.
466 //
467 // However, given we're emulating this behavior, most of this can be ignored
468 // and it's sufficient to simply set the member variable for querying via
469 // IsAutoSleepDisabled().
470
471 LOG_DEBUG(Service_AM, "called. is_auto_sleep_disabled={}", is_auto_sleep_disabled);
472
473 IPC::ResponseBuilder rb{ctx, 2};
474 rb.Push(RESULT_SUCCESS);
475}
476
477void ISelfController::IsAutoSleepDisabled(Kernel::HLERequestContext& ctx) {
478 LOG_DEBUG(Service_AM, "called.");
479
480 IPC::ResponseBuilder rb{ctx, 3};
481 rb.Push(RESULT_SUCCESS);
482 rb.Push(is_auto_sleep_disabled);
483}
484
457void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) { 485void ISelfController::GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx) {
458 LOG_DEBUG(Service_AM, "called."); 486 LOG_DEBUG(Service_AM, "called.");
459 487
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 14b010164..6cb582483 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -133,6 +133,8 @@ private:
133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx); 133 void SetHandlesRequestToDisplay(Kernel::HLERequestContext& ctx);
134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 134 void SetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx); 135 void GetIdleTimeDetectionExtension(Kernel::HLERequestContext& ctx);
136 void SetAutoSleepDisabled(Kernel::HLERequestContext& ctx);
137 void IsAutoSleepDisabled(Kernel::HLERequestContext& ctx);
136 void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); 138 void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx);
137 void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); 139 void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx);
138 140
@@ -142,6 +144,7 @@ private:
142 144
143 u32 idle_time_detection_extension = 0; 145 u32 idle_time_detection_extension = 0;
144 u64 num_fatal_sections_entered = 0; 146 u64 num_fatal_sections_entered = 0;
147 bool is_auto_sleep_disabled = false;
145}; 148};
146 149
147class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> { 150class ICommonStateGetter final : public ServiceFramework<ICommonStateGetter> {
diff --git a/src/core/hle/service/audio/audio.cpp b/src/core/hle/service/audio/audio.cpp
index 128df7db5..1781bec83 100644
--- a/src/core/hle/service/audio/audio.cpp
+++ b/src/core/hle/service/audio/audio.cpp
@@ -19,16 +19,16 @@
19 19
20namespace Service::Audio { 20namespace Service::Audio {
21 21
22void InstallInterfaces(SM::ServiceManager& service_manager) { 22void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system) {
23 std::make_shared<AudCtl>()->InstallAsService(service_manager); 23 std::make_shared<AudCtl>()->InstallAsService(service_manager);
24 std::make_shared<AudOutA>()->InstallAsService(service_manager); 24 std::make_shared<AudOutA>()->InstallAsService(service_manager);
25 std::make_shared<AudOutU>()->InstallAsService(service_manager); 25 std::make_shared<AudOutU>(system)->InstallAsService(service_manager);
26 std::make_shared<AudInA>()->InstallAsService(service_manager); 26 std::make_shared<AudInA>()->InstallAsService(service_manager);
27 std::make_shared<AudInU>()->InstallAsService(service_manager); 27 std::make_shared<AudInU>()->InstallAsService(service_manager);
28 std::make_shared<AudRecA>()->InstallAsService(service_manager); 28 std::make_shared<AudRecA>()->InstallAsService(service_manager);
29 std::make_shared<AudRecU>()->InstallAsService(service_manager); 29 std::make_shared<AudRecU>()->InstallAsService(service_manager);
30 std::make_shared<AudRenA>()->InstallAsService(service_manager); 30 std::make_shared<AudRenA>()->InstallAsService(service_manager);
31 std::make_shared<AudRenU>()->InstallAsService(service_manager); 31 std::make_shared<AudRenU>(system)->InstallAsService(service_manager);
32 std::make_shared<CodecCtl>()->InstallAsService(service_manager); 32 std::make_shared<CodecCtl>()->InstallAsService(service_manager);
33 std::make_shared<HwOpus>()->InstallAsService(service_manager); 33 std::make_shared<HwOpus>()->InstallAsService(service_manager);
34 34
diff --git a/src/core/hle/service/audio/audio.h b/src/core/hle/service/audio/audio.h
index f5bd3bf5f..b6d13912e 100644
--- a/src/core/hle/service/audio/audio.h
+++ b/src/core/hle/service/audio/audio.h
@@ -4,6 +4,10 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Core {
8class System;
9}
10
7namespace Service::SM { 11namespace Service::SM {
8class ServiceManager; 12class ServiceManager;
9} 13}
@@ -11,6 +15,6 @@ class ServiceManager;
11namespace Service::Audio { 15namespace Service::Audio {
12 16
13/// Registers all Audio services with the specified service manager. 17/// Registers all Audio services with the specified service manager.
14void InstallInterfaces(SM::ServiceManager& service_manager); 18void InstallInterfaces(SM::ServiceManager& service_manager, Core::System& system);
15 19
16} // namespace Service::Audio 20} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 7db6eb08d..fb84a8f13 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -40,8 +40,8 @@ enum class AudioState : u32 {
40 40
41class IAudioOut final : public ServiceFramework<IAudioOut> { 41class IAudioOut final : public ServiceFramework<IAudioOut> {
42public: 42public:
43 IAudioOut(AudoutParams audio_params, AudioCore::AudioOut& audio_core, std::string&& device_name, 43 IAudioOut(Core::System& system, AudoutParams audio_params, AudioCore::AudioOut& audio_core,
44 std::string&& unique_name) 44 std::string&& device_name, std::string&& unique_name)
45 : ServiceFramework("IAudioOut"), audio_core(audio_core), 45 : ServiceFramework("IAudioOut"), audio_core(audio_core),
46 device_name(std::move(device_name)), audio_params(audio_params) { 46 device_name(std::move(device_name)), audio_params(audio_params) {
47 // clang-format off 47 // clang-format off
@@ -65,7 +65,6 @@ public:
65 RegisterHandlers(functions); 65 RegisterHandlers(functions);
66 66
67 // This is the event handle used to check if the audio buffer was released 67 // This is the event handle used to check if the audio buffer was released
68 auto& system = Core::System::GetInstance();
69 buffer_event = Kernel::WritableEvent::CreateEventPair( 68 buffer_event = Kernel::WritableEvent::CreateEventPair(
70 system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased"); 69 system.Kernel(), Kernel::ResetType::Manual, "IAudioOutBufferReleased");
71 70
@@ -212,6 +211,22 @@ private:
212 Kernel::EventPair buffer_event; 211 Kernel::EventPair buffer_event;
213}; 212};
214 213
214AudOutU::AudOutU(Core::System& system_) : ServiceFramework("audout:u"), system{system_} {
215 // clang-format off
216 static const FunctionInfo functions[] = {
217 {0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
218 {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
219 {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
220 {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"},
221 };
222 // clang-format on
223
224 RegisterHandlers(functions);
225 audio_core = std::make_unique<AudioCore::AudioOut>();
226}
227
228AudOutU::~AudOutU() = default;
229
215void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) { 230void AudOutU::ListAudioOutsImpl(Kernel::HLERequestContext& ctx) {
216 LOG_DEBUG(Service_Audio, "called"); 231 LOG_DEBUG(Service_Audio, "called");
217 232
@@ -248,7 +263,7 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
248 263
249 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())}; 264 std::string unique_name{fmt::format("{}-{}", device_name, audio_out_interfaces.size())};
250 auto audio_out_interface = std::make_shared<IAudioOut>( 265 auto audio_out_interface = std::make_shared<IAudioOut>(
251 params, *audio_core, std::move(device_name), std::move(unique_name)); 266 system, params, *audio_core, std::move(device_name), std::move(unique_name));
252 267
253 IPC::ResponseBuilder rb{ctx, 6, 0, 1}; 268 IPC::ResponseBuilder rb{ctx, 6, 0, 1};
254 rb.Push(RESULT_SUCCESS); 269 rb.Push(RESULT_SUCCESS);
@@ -256,20 +271,9 @@ void AudOutU::OpenAudioOutImpl(Kernel::HLERequestContext& ctx) {
256 rb.Push<u32>(params.channel_count); 271 rb.Push<u32>(params.channel_count);
257 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16)); 272 rb.Push<u32>(static_cast<u32>(AudioCore::Codec::PcmFormat::Int16));
258 rb.Push<u32>(static_cast<u32>(AudioState::Stopped)); 273 rb.Push<u32>(static_cast<u32>(AudioState::Stopped));
259 rb.PushIpcInterface<Audio::IAudioOut>(audio_out_interface); 274 rb.PushIpcInterface<IAudioOut>(audio_out_interface);
260 275
261 audio_out_interfaces.push_back(std::move(audio_out_interface)); 276 audio_out_interfaces.push_back(std::move(audio_out_interface));
262} 277}
263 278
264AudOutU::AudOutU() : ServiceFramework("audout:u") {
265 static const FunctionInfo functions[] = {{0, &AudOutU::ListAudioOutsImpl, "ListAudioOuts"},
266 {1, &AudOutU::OpenAudioOutImpl, "OpenAudioOut"},
267 {2, &AudOutU::ListAudioOutsImpl, "ListAudioOutsAuto"},
268 {3, &AudOutU::OpenAudioOutImpl, "OpenAudioOutAuto"}};
269 RegisterHandlers(functions);
270 audio_core = std::make_unique<AudioCore::AudioOut>();
271}
272
273AudOutU::~AudOutU() = default;
274
275} // namespace Service::Audio 279} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audout_u.h b/src/core/hle/service/audio/audout_u.h
index aed4c43b2..c9f532ccd 100644
--- a/src/core/hle/service/audio/audout_u.h
+++ b/src/core/hle/service/audio/audout_u.h
@@ -11,6 +11,10 @@ namespace AudioCore {
11class AudioOut; 11class AudioOut;
12} 12}
13 13
14namespace Core {
15class System;
16}
17
14namespace Kernel { 18namespace Kernel {
15class HLERequestContext; 19class HLERequestContext;
16} 20}
@@ -21,15 +25,17 @@ class IAudioOut;
21 25
22class AudOutU final : public ServiceFramework<AudOutU> { 26class AudOutU final : public ServiceFramework<AudOutU> {
23public: 27public:
24 AudOutU(); 28 explicit AudOutU(Core::System& system_);
25 ~AudOutU() override; 29 ~AudOutU() override;
26 30
27private: 31private:
32 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx);
33 void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
34
28 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces; 35 std::vector<std::shared_ptr<IAudioOut>> audio_out_interfaces;
29 std::unique_ptr<AudioCore::AudioOut> audio_core; 36 std::unique_ptr<AudioCore::AudioOut> audio_core;
30 37
31 void ListAudioOutsImpl(Kernel::HLERequestContext& ctx); 38 Core::System& system;
32 void OpenAudioOutImpl(Kernel::HLERequestContext& ctx);
33}; 39};
34 40
35} // namespace Service::Audio 41} // namespace Service::Audio
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 3711e1ea1..5b0b7f17e 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -5,6 +5,7 @@
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <memory> 7#include <memory>
8#include <string_view>
8 9
9#include "audio_core/audio_renderer.h" 10#include "audio_core/audio_renderer.h"
10#include "common/alignment.h" 11#include "common/alignment.h"
@@ -25,7 +26,8 @@ namespace Service::Audio {
25 26
26class IAudioRenderer final : public ServiceFramework<IAudioRenderer> { 27class IAudioRenderer final : public ServiceFramework<IAudioRenderer> {
27public: 28public:
28 explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params) 29 explicit IAudioRenderer(Core::System& system, AudioCore::AudioRendererParameter audren_params,
30 const std::size_t instance_number)
29 : ServiceFramework("IAudioRenderer") { 31 : ServiceFramework("IAudioRenderer") {
30 // clang-format off 32 // clang-format off
31 static const FunctionInfo functions[] = { 33 static const FunctionInfo functions[] = {
@@ -45,11 +47,10 @@ public:
45 // clang-format on 47 // clang-format on
46 RegisterHandlers(functions); 48 RegisterHandlers(functions);
47 49
48 auto& system = Core::System::GetInstance();
49 system_event = Kernel::WritableEvent::CreateEventPair( 50 system_event = Kernel::WritableEvent::CreateEventPair(
50 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent"); 51 system.Kernel(), Kernel::ResetType::Manual, "IAudioRenderer:SystemEvent");
51 renderer = std::make_unique<AudioCore::AudioRenderer>(system.CoreTiming(), audren_params, 52 renderer = std::make_unique<AudioCore::AudioRenderer>(
52 system_event.writable); 53 system.CoreTiming(), audren_params, system_event.writable, instance_number);
53 } 54 }
54 55
55private: 56private:
@@ -159,7 +160,8 @@ private:
159 160
160class IAudioDevice final : public ServiceFramework<IAudioDevice> { 161class IAudioDevice final : public ServiceFramework<IAudioDevice> {
161public: 162public:
162 IAudioDevice() : ServiceFramework("IAudioDevice") { 163 explicit IAudioDevice(Core::System& system, u32_le revision_num)
164 : ServiceFramework("IAudioDevice"), revision{revision_num} {
163 static const FunctionInfo functions[] = { 165 static const FunctionInfo functions[] = {
164 {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"}, 166 {0, &IAudioDevice::ListAudioDeviceName, "ListAudioDeviceName"},
165 {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"}, 167 {1, &IAudioDevice::SetAudioDeviceOutputVolume, "SetAudioDeviceOutputVolume"},
@@ -177,7 +179,7 @@ public:
177 }; 179 };
178 RegisterHandlers(functions); 180 RegisterHandlers(functions);
179 181
180 auto& kernel = Core::System::GetInstance().Kernel(); 182 auto& kernel = system.Kernel();
181 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic, 183 buffer_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
182 "IAudioOutBufferReleasedEvent"); 184 "IAudioOutBufferReleasedEvent");
183 185
@@ -188,15 +190,47 @@ public:
188 } 190 }
189 191
190private: 192private:
193 using AudioDeviceName = std::array<char, 256>;
194 static constexpr std::array<std::string_view, 4> audio_device_names{{
195 "AudioStereoJackOutput",
196 "AudioBuiltInSpeakerOutput",
197 "AudioTvOutput",
198 "AudioUsbDeviceOutput",
199 }};
200 enum class DeviceType {
201 AHUBHeadphones,
202 AHUBSpeakers,
203 HDA,
204 USBOutput,
205 };
206
191 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) { 207 void ListAudioDeviceName(Kernel::HLERequestContext& ctx) {
192 LOG_WARNING(Service_Audio, "(STUBBED) called"); 208 LOG_DEBUG(Service_Audio, "called");
193 209
194 constexpr std::array<char, 15> audio_interface{{"AudioInterface"}}; 210 const bool usb_output_supported =
195 ctx.WriteBuffer(audio_interface); 211 IsFeatureSupported(AudioFeatures::AudioUSBDeviceOutput, revision);
212 const std::size_t count = ctx.GetWriteBufferSize() / sizeof(AudioDeviceName);
213
214 std::vector<AudioDeviceName> name_buffer;
215 name_buffer.reserve(audio_device_names.size());
216
217 for (std::size_t i = 0; i < count && i < audio_device_names.size(); i++) {
218 const auto type = static_cast<DeviceType>(i);
219
220 if (!usb_output_supported && type == DeviceType::USBOutput) {
221 continue;
222 }
223
224 const auto& device_name = audio_device_names[i];
225 auto& entry = name_buffer.emplace_back();
226 device_name.copy(entry.data(), device_name.size());
227 }
228
229 ctx.WriteBuffer(name_buffer);
196 230
197 IPC::ResponseBuilder rb{ctx, 3}; 231 IPC::ResponseBuilder rb{ctx, 3};
198 rb.Push(RESULT_SUCCESS); 232 rb.Push(RESULT_SUCCESS);
199 rb.Push<u32>(1); 233 rb.Push(static_cast<u32>(name_buffer.size()));
200 } 234 }
201 235
202 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) { 236 void SetAudioDeviceOutputVolume(Kernel::HLERequestContext& ctx) {
@@ -215,12 +249,16 @@ private:
215 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) { 249 void GetActiveAudioDeviceName(Kernel::HLERequestContext& ctx) {
216 LOG_WARNING(Service_Audio, "(STUBBED) called"); 250 LOG_WARNING(Service_Audio, "(STUBBED) called");
217 251
218 constexpr std::array<char, 12> audio_interface{{"AudioDevice"}}; 252 // Currently set to always be TV audio output.
219 ctx.WriteBuffer(audio_interface); 253 const auto& device_name = audio_device_names[2];
220 254
221 IPC::ResponseBuilder rb{ctx, 3}; 255 AudioDeviceName out_device_name{};
256 device_name.copy(out_device_name.data(), device_name.size());
257
258 ctx.WriteBuffer(out_device_name);
259
260 IPC::ResponseBuilder rb{ctx, 2};
222 rb.Push(RESULT_SUCCESS); 261 rb.Push(RESULT_SUCCESS);
223 rb.Push<u32>(1);
224 } 262 }
225 263
226 void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) { 264 void QueryAudioDeviceSystemEvent(Kernel::HLERequestContext& ctx) {
@@ -249,12 +287,13 @@ private:
249 rb.PushCopyObjects(audio_output_device_switch_event.readable); 287 rb.PushCopyObjects(audio_output_device_switch_event.readable);
250 } 288 }
251 289
290 u32_le revision = 0;
252 Kernel::EventPair buffer_event; 291 Kernel::EventPair buffer_event;
253 Kernel::EventPair audio_output_device_switch_event; 292 Kernel::EventPair audio_output_device_switch_event;
254 293
255}; // namespace Audio 294}; // namespace Audio
256 295
257AudRenU::AudRenU() : ServiceFramework("audren:u") { 296AudRenU::AudRenU(Core::System& system_) : ServiceFramework("audren:u"), system{system_} {
258 // clang-format off 297 // clang-format off
259 static const FunctionInfo functions[] = { 298 static const FunctionInfo functions[] = {
260 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"}, 299 {0, &AudRenU::OpenAudioRenderer, "OpenAudioRenderer"},
@@ -327,7 +366,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
327 }; 366 };
328 367
329 // Calculates the portion of the size related to the mix data (and the sorting thereof). 368 // Calculates the portion of the size related to the mix data (and the sorting thereof).
330 const auto calculate_mix_info_size = [this](const AudioCore::AudioRendererParameter& params) { 369 const auto calculate_mix_info_size = [](const AudioCore::AudioRendererParameter& params) {
331 // The size of the mixing info data structure. 370 // The size of the mixing info data structure.
332 constexpr u64 mix_info_size = 0x940; 371 constexpr u64 mix_info_size = 0x940;
333 372
@@ -399,7 +438,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
399 438
400 // Calculates the part of the size related to the splitter context. 439 // Calculates the part of the size related to the splitter context.
401 const auto calculate_splitter_context_size = 440 const auto calculate_splitter_context_size =
402 [this](const AudioCore::AudioRendererParameter& params) -> u64 { 441 [](const AudioCore::AudioRendererParameter& params) -> u64 {
403 if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) { 442 if (!IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
404 return 0; 443 return 0;
405 } 444 }
@@ -446,7 +485,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
446 }; 485 };
447 486
448 // Calculates the part of the size related to performance statistics. 487 // Calculates the part of the size related to performance statistics.
449 const auto calculate_perf_size = [this](const AudioCore::AudioRendererParameter& params) { 488 const auto calculate_perf_size = [](const AudioCore::AudioRendererParameter& params) {
450 // Extra size value appended to the end of the calculation. 489 // Extra size value appended to the end of the calculation.
451 constexpr u64 appended = 128; 490 constexpr u64 appended = 128;
452 491
@@ -473,78 +512,76 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
473 }; 512 };
474 513
475 // Calculates the part of the size that relates to the audio command buffer. 514 // Calculates the part of the size that relates to the audio command buffer.
476 const auto calculate_command_buffer_size = 515 const auto calculate_command_buffer_size = [](const AudioCore::AudioRendererParameter& params) {
477 [this](const AudioCore::AudioRendererParameter& params) { 516 constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
478 constexpr u64 alignment = (buffer_alignment_size - 1) * 2;
479 517
480 if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) { 518 if (!IsFeatureSupported(AudioFeatures::VariadicCommandBuffer, params.revision)) {
481 constexpr u64 command_buffer_size = 0x18000; 519 constexpr u64 command_buffer_size = 0x18000;
482 520
483 return command_buffer_size + alignment; 521 return command_buffer_size + alignment;
484 } 522 }
485 523
486 // When the variadic command buffer is supported, this means 524 // When the variadic command buffer is supported, this means
487 // the command generator for the audio renderer can issue commands 525 // the command generator for the audio renderer can issue commands
488 // that are (as one would expect), variable in size. So what we need to do 526 // that are (as one would expect), variable in size. So what we need to do
489 // is determine the maximum possible size for a few command data structures 527 // is determine the maximum possible size for a few command data structures
490 // then multiply them by the amount of present commands indicated by the given 528 // then multiply them by the amount of present commands indicated by the given
491 // respective audio parameters. 529 // respective audio parameters.
492 530
493 constexpr u64 max_biquad_filters = 2; 531 constexpr u64 max_biquad_filters = 2;
494 constexpr u64 max_mix_buffers = 24; 532 constexpr u64 max_mix_buffers = 24;
495 533
496 constexpr u64 biquad_filter_command_size = 0x2C; 534 constexpr u64 biquad_filter_command_size = 0x2C;
497 535
498 constexpr u64 depop_mix_command_size = 0x24; 536 constexpr u64 depop_mix_command_size = 0x24;
499 constexpr u64 depop_setup_command_size = 0x50; 537 constexpr u64 depop_setup_command_size = 0x50;
500 538
501 constexpr u64 effect_command_max_size = 0x540; 539 constexpr u64 effect_command_max_size = 0x540;
502 540
503 constexpr u64 mix_command_size = 0x1C; 541 constexpr u64 mix_command_size = 0x1C;
504 constexpr u64 mix_ramp_command_size = 0x24; 542 constexpr u64 mix_ramp_command_size = 0x24;
505 constexpr u64 mix_ramp_grouped_command_size = 0x13C; 543 constexpr u64 mix_ramp_grouped_command_size = 0x13C;
506 544
507 constexpr u64 perf_command_size = 0x28; 545 constexpr u64 perf_command_size = 0x28;
508 546
509 constexpr u64 sink_command_size = 0x130; 547 constexpr u64 sink_command_size = 0x130;
510 548
511 constexpr u64 submix_command_max_size = 549 constexpr u64 submix_command_max_size =
512 depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers; 550 depop_mix_command_size + (mix_command_size * max_mix_buffers) * max_mix_buffers;
513 551
514 constexpr u64 volume_command_size = 0x1C; 552 constexpr u64 volume_command_size = 0x1C;
515 constexpr u64 volume_ramp_command_size = 0x20; 553 constexpr u64 volume_ramp_command_size = 0x20;
516 554
517 constexpr u64 voice_biquad_filter_command_size = 555 constexpr u64 voice_biquad_filter_command_size =
518 biquad_filter_command_size * max_biquad_filters; 556 biquad_filter_command_size * max_biquad_filters;
519 constexpr u64 voice_data_command_size = 0x9C; 557 constexpr u64 voice_data_command_size = 0x9C;
520 const u64 voice_command_max_size = 558 const u64 voice_command_max_size =
521 (params.splitter_count * depop_setup_command_size) + 559 (params.splitter_count * depop_setup_command_size) +
522 (voice_data_command_size + voice_biquad_filter_command_size + 560 (voice_data_command_size + voice_biquad_filter_command_size + volume_ramp_command_size +
523 volume_ramp_command_size + mix_ramp_grouped_command_size); 561 mix_ramp_grouped_command_size);
524 562
525 // Now calculate the individual elements that comprise the size and add them together. 563 // Now calculate the individual elements that comprise the size and add them together.
526 const u64 effect_commands_size = params.effect_count * effect_command_max_size; 564 const u64 effect_commands_size = params.effect_count * effect_command_max_size;
527 565
528 const u64 final_mix_commands_size = 566 const u64 final_mix_commands_size =
529 depop_mix_command_size + volume_command_size * max_mix_buffers; 567 depop_mix_command_size + volume_command_size * max_mix_buffers;
530 568
531 const u64 perf_commands_size = 569 const u64 perf_commands_size =
532 perf_command_size * 570 perf_command_size * (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
533 (CalculateNumPerformanceEntries(params) + max_perf_detail_entries);
534 571
535 const u64 sink_commands_size = params.sink_count * sink_command_size; 572 const u64 sink_commands_size = params.sink_count * sink_command_size;
536 573
537 const u64 splitter_commands_size = 574 const u64 splitter_commands_size =
538 params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size; 575 params.num_splitter_send_channels * max_mix_buffers * mix_ramp_command_size;
539 576
540 const u64 submix_commands_size = params.submix_count * submix_command_max_size; 577 const u64 submix_commands_size = params.submix_count * submix_command_max_size;
541 578
542 const u64 voice_commands_size = params.voice_count * voice_command_max_size; 579 const u64 voice_commands_size = params.voice_count * voice_command_max_size;
543 580
544 return effect_commands_size + final_mix_commands_size + perf_commands_size + 581 return effect_commands_size + final_mix_commands_size + perf_commands_size +
545 sink_commands_size + splitter_commands_size + submix_commands_size + 582 sink_commands_size + splitter_commands_size + submix_commands_size +
546 voice_commands_size + alignment; 583 voice_commands_size + alignment;
547 }; 584 };
548 585
549 IPC::RequestParser rp{ctx}; 586 IPC::RequestParser rp{ctx};
550 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>(); 587 const auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
@@ -577,12 +614,16 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
577} 614}
578 615
579void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) { 616void AudRenU::GetAudioDeviceService(Kernel::HLERequestContext& ctx) {
580 LOG_DEBUG(Service_Audio, "called"); 617 IPC::RequestParser rp{ctx};
618 const u64 aruid = rp.Pop<u64>();
581 619
582 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 620 LOG_DEBUG(Service_Audio, "called. aruid={:016X}", aruid);
583 621
622 // Revisionless variant of GetAudioDeviceServiceWithRevisionInfo that
623 // always assumes the initial release revision (REV1).
624 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
584 rb.Push(RESULT_SUCCESS); 625 rb.Push(RESULT_SUCCESS);
585 rb.PushIpcInterface<Audio::IAudioDevice>(); 626 rb.PushIpcInterface<IAudioDevice>(system, Common::MakeMagic('R', 'E', 'V', '1'));
586} 627}
587 628
588void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) { 629void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
@@ -592,13 +633,19 @@ void AudRenU::OpenAudioRendererAuto(Kernel::HLERequestContext& ctx) {
592} 633}
593 634
594void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) { 635void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
595 LOG_WARNING(Service_Audio, "(STUBBED) called"); 636 struct Parameters {
637 u32 revision;
638 u64 aruid;
639 };
596 640
597 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 641 IPC::RequestParser rp{ctx};
642 const auto [revision, aruid] = rp.PopRaw<Parameters>();
643
644 LOG_DEBUG(Service_Audio, "called. revision={:08X}, aruid={:016X}", revision, aruid);
598 645
646 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
599 rb.Push(RESULT_SUCCESS); 647 rb.Push(RESULT_SUCCESS);
600 rb.PushIpcInterface<Audio::IAudioDevice>(); // TODO(ogniK): Figure out what is different 648 rb.PushIpcInterface<IAudioDevice>(system, revision);
601 // based on the current revision
602} 649}
603 650
604void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) { 651void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
@@ -607,14 +654,16 @@ void AudRenU::OpenAudioRendererImpl(Kernel::HLERequestContext& ctx) {
607 IPC::ResponseBuilder rb{ctx, 2, 0, 1}; 654 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
608 655
609 rb.Push(RESULT_SUCCESS); 656 rb.Push(RESULT_SUCCESS);
610 rb.PushIpcInterface<IAudioRenderer>(params); 657 rb.PushIpcInterface<IAudioRenderer>(system, params, audren_instance_count++);
611} 658}
612 659
613bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const { 660bool IsFeatureSupported(AudioFeatures feature, u32_le revision) {
614 // Byte swap 661 // Byte swap
615 const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0'); 662 const u32_be version_num = revision - Common::MakeMagic('R', 'E', 'V', '0');
616 663
617 switch (feature) { 664 switch (feature) {
665 case AudioFeatures::AudioUSBDeviceOutput:
666 return version_num >= 4U;
618 case AudioFeatures::Splitter: 667 case AudioFeatures::Splitter:
619 return version_num >= 2U; 668 return version_num >= 2U;
620 case AudioFeatures::PerformanceMetricsVersion2: 669 case AudioFeatures::PerformanceMetricsVersion2:
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 1d3c8df61..4e0ccc792 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -6,6 +6,10 @@
6 6
7#include "core/hle/service/service.h" 7#include "core/hle/service/service.h"
8 8
9namespace Core {
10class System;
11}
12
9namespace Kernel { 13namespace Kernel {
10class HLERequestContext; 14class HLERequestContext;
11} 15}
@@ -14,7 +18,7 @@ namespace Service::Audio {
14 18
15class AudRenU final : public ServiceFramework<AudRenU> { 19class AudRenU final : public ServiceFramework<AudRenU> {
16public: 20public:
17 explicit AudRenU(); 21 explicit AudRenU(Core::System& system_);
18 ~AudRenU() override; 22 ~AudRenU() override;
19 23
20private: 24private:
@@ -26,13 +30,19 @@ private:
26 30
27 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx); 31 void OpenAudioRendererImpl(Kernel::HLERequestContext& ctx);
28 32
29 enum class AudioFeatures : u32 { 33 std::size_t audren_instance_count = 0;
30 Splitter, 34 Core::System& system;
31 PerformanceMetricsVersion2, 35};
32 VariadicCommandBuffer,
33 };
34 36
35 bool IsFeatureSupported(AudioFeatures feature, u32_le revision) const; 37// Describes a particular audio feature that may be supported in a particular revision.
38enum class AudioFeatures : u32 {
39 AudioUSBDeviceOutput,
40 Splitter,
41 PerformanceMetricsVersion2,
42 VariadicCommandBuffer,
36}; 43};
37 44
45// Tests if a particular audio feature is supported with a given audio revision.
46bool IsFeatureSupported(AudioFeatures feature, u32_le revision);
47
38} // namespace Service::Audio 48} // namespace Service::Audio
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index dec541f2e..d1ec12ef9 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -22,7 +22,7 @@ public:
22 {0, nullptr, "GetCompletionEvent"}, 22 {0, nullptr, "GetCompletionEvent"},
23 {1, nullptr, "Cancel"}, 23 {1, nullptr, "Cancel"},
24 {10100, nullptr, "GetFriendListIds"}, 24 {10100, nullptr, "GetFriendListIds"},
25 {10101, nullptr, "GetFriendList"}, 25 {10101, &IFriendService::GetFriendList, "GetFriendList"},
26 {10102, nullptr, "UpdateFriendInfo"}, 26 {10102, nullptr, "UpdateFriendInfo"},
27 {10110, nullptr, "GetFriendProfileImage"}, 27 {10110, nullptr, "GetFriendProfileImage"},
28 {10200, nullptr, "SendFriendRequestForApplication"}, 28 {10200, nullptr, "SendFriendRequestForApplication"},
@@ -99,6 +99,23 @@ public:
99 } 99 }
100 100
101private: 101private:
102 enum class PresenceFilter : u32 {
103 None = 0,
104 Online = 1,
105 OnlinePlay = 2,
106 OnlineOrOnlinePlay = 3,
107 };
108
109 struct SizedFriendFilter {
110 PresenceFilter presence;
111 u8 is_favorite;
112 u8 same_app;
113 u8 same_app_played;
114 u8 arbitary_app_played;
115 u64 group_id;
116 };
117 static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");
118
102 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { 119 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
103 // Stub used by Splatoon 2 120 // Stub used by Splatoon 2
104 LOG_WARNING(Service_ACC, "(STUBBED) called"); 121 LOG_WARNING(Service_ACC, "(STUBBED) called");
@@ -112,6 +129,22 @@ private:
112 IPC::ResponseBuilder rb{ctx, 2}; 129 IPC::ResponseBuilder rb{ctx, 2};
113 rb.Push(RESULT_SUCCESS); 130 rb.Push(RESULT_SUCCESS);
114 } 131 }
132
133 void GetFriendList(Kernel::HLERequestContext& ctx) {
134 IPC::RequestParser rp{ctx};
135 const auto friend_offset = rp.Pop<u32>();
136 const auto uuid = rp.PopRaw<Common::UUID>();
137 [[maybe_unused]] const auto filter = rp.PopRaw<SizedFriendFilter>();
138 const auto pid = rp.Pop<u64>();
139 LOG_WARNING(Service_ACC, "(STUBBED) called, offset={}, uuid={}, pid={}", friend_offset,
140 uuid.Format(), pid);
141
142 IPC::ResponseBuilder rb{ctx, 3};
143 rb.Push(RESULT_SUCCESS);
144
145 rb.Push<u32>(0); // Friend count
146 // TODO(ogniK): Return a buffer of u64s which are the "NetworkServiceAccountId"
147 }
115}; 148};
116 149
117class INotificationService final : public ServiceFramework<INotificationService> { 150class INotificationService final : public ServiceFramework<INotificationService> {
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp
index b839303ac..8ddad8682 100644
--- a/src/core/hle/service/ldr/ldr.cpp
+++ b/src/core/hle/service/ldr/ldr.cpp
@@ -345,14 +345,16 @@ public:
345 vm_manager 345 vm_manager
346 .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode) 346 .MirrorMemory(*map_address, nro_address, nro_size, Kernel::MemoryState::ModuleCode)
347 .IsSuccess()); 347 .IsSuccess());
348 ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); 348 ASSERT(vm_manager.ReprotectRange(nro_address, nro_size, Kernel::VMAPermission::None)
349 .IsSuccess());
349 350
350 if (bss_size > 0) { 351 if (bss_size > 0) {
351 ASSERT(vm_manager 352 ASSERT(vm_manager
352 .MirrorMemory(*map_address + nro_size, bss_address, bss_size, 353 .MirrorMemory(*map_address + nro_size, bss_address, bss_size,
353 Kernel::MemoryState::ModuleCode) 354 Kernel::MemoryState::ModuleCode)
354 .IsSuccess()); 355 .IsSuccess());
355 ASSERT(vm_manager.UnmapRange(bss_address, bss_size).IsSuccess()); 356 ASSERT(vm_manager.ReprotectRange(bss_address, bss_size, Kernel::VMAPermission::None)
357 .IsSuccess());
356 } 358 }
357 359
358 vm_manager.ReprotectRange(*map_address, header.text_size, 360 vm_manager.ReprotectRange(*map_address, header.text_size,
@@ -364,7 +366,8 @@ public:
364 366
365 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 367 Core::System::GetInstance().InvalidateCpuInstructionCaches();
366 368
367 nro.insert_or_assign(*map_address, NROInfo{hash, nro_size + bss_size}); 369 nro.insert_or_assign(*map_address,
370 NROInfo{hash, nro_address, nro_size, bss_address, bss_size});
368 371
369 IPC::ResponseBuilder rb{ctx, 4}; 372 IPC::ResponseBuilder rb{ctx, 4};
370 rb.Push(RESULT_SUCCESS); 373 rb.Push(RESULT_SUCCESS);
@@ -409,9 +412,23 @@ public:
409 } 412 }
410 413
411 auto& vm_manager = Core::CurrentProcess()->VMManager(); 414 auto& vm_manager = Core::CurrentProcess()->VMManager();
412 const auto& nro_size = iter->second.size; 415 const auto& nro_info = iter->second;
413 416
414 ASSERT(vm_manager.UnmapRange(nro_address, nro_size).IsSuccess()); 417 // Unmap the mirrored memory
418 ASSERT(
419 vm_manager.UnmapRange(nro_address, nro_info.nro_size + nro_info.bss_size).IsSuccess());
420
421 // Reprotect the source memory
422 ASSERT(vm_manager
423 .ReprotectRange(nro_info.nro_address, nro_info.nro_size,
424 Kernel::VMAPermission::ReadWrite)
425 .IsSuccess());
426 if (nro_info.bss_size > 0) {
427 ASSERT(vm_manager
428 .ReprotectRange(nro_info.bss_address, nro_info.bss_size,
429 Kernel::VMAPermission::ReadWrite)
430 .IsSuccess());
431 }
415 432
416 Core::System::GetInstance().InvalidateCpuInstructionCaches(); 433 Core::System::GetInstance().InvalidateCpuInstructionCaches();
417 434
@@ -473,7 +490,10 @@ private:
473 490
474 struct NROInfo { 491 struct NROInfo {
475 SHA256Hash hash; 492 SHA256Hash hash;
476 u64 size; 493 VAddr nro_address;
494 u64 nro_size;
495 VAddr bss_address;
496 u64 bss_size;
477 }; 497 };
478 498
479 bool initialized = false; 499 bool initialized = false;
diff --git a/src/core/hle/service/mii/mii.cpp b/src/core/hle/service/mii/mii.cpp
index ce84e25ed..0b3923ad9 100644
--- a/src/core/hle/service/mii/mii.cpp
+++ b/src/core/hle/service/mii/mii.cpp
@@ -48,7 +48,7 @@ public:
48 {19, nullptr, "Export"}, 48 {19, nullptr, "Export"},
49 {20, nullptr, "IsBrokenDatabaseWithClearFlag"}, 49 {20, nullptr, "IsBrokenDatabaseWithClearFlag"},
50 {21, &IDatabaseService::GetIndex, "GetIndex"}, 50 {21, &IDatabaseService::GetIndex, "GetIndex"},
51 {22, nullptr, "SetInterfaceVersion"}, 51 {22, &IDatabaseService::SetInterfaceVersion, "SetInterfaceVersion"},
52 {23, nullptr, "Convert"}, 52 {23, nullptr, "Convert"},
53 }; 53 };
54 // clang-format on 54 // clang-format on
@@ -350,8 +350,22 @@ private:
350 rb.Push(index); 350 rb.Push(index);
351 } 351 }
352 352
353 void SetInterfaceVersion(Kernel::HLERequestContext& ctx) {
354 IPC::RequestParser rp{ctx};
355 current_interface_version = rp.PopRaw<u32>();
356
357 LOG_DEBUG(Service_Mii, "called, interface_version={:08X}", current_interface_version);
358
359 UNIMPLEMENTED_IF(current_interface_version != 1);
360
361 IPC::ResponseBuilder rb{ctx, 2};
362 rb.Push(RESULT_SUCCESS);
363 }
364
353 MiiManager db; 365 MiiManager db;
354 366
367 u32 current_interface_version = 0;
368
355 // Last read offsets of Get functions 369 // Last read offsets of Get functions
356 std::array<u32, 4> offsets{}; 370 std::array<u32, 4> offsets{};
357}; 371};
diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp
index ad176f89d..2a522136d 100644
--- a/src/core/hle/service/ns/pl_u.cpp
+++ b/src/core/hle/service/ns/pl_u.cpp
@@ -77,7 +77,7 @@ enum class LoadState : u32 {
77 Done = 1, 77 Done = 1,
78}; 78};
79 79
80static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, 80static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output,
81 std::size_t& offset) { 81 std::size_t& offset) {
82 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, 82 ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
83 "Shared fonts exceeds 17mb!"); 83 "Shared fonts exceeds 17mb!");
@@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou
94 offset += transformed_font.size() * sizeof(u32); 94 offset += transformed_font.size() * sizeof(u32);
95} 95}
96 96
97static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output, 97static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output,
98 std::size_t& offset) { 98 std::size_t& offset) {
99 ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); 99 ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
100 const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; 100 const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
@@ -121,7 +121,7 @@ struct PL_U::Impl {
121 return shared_font_regions.at(index); 121 return shared_font_regions.at(index);
122 } 122 }
123 123
124 void BuildSharedFontsRawRegions(const std::vector<u8>& input) { 124 void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) {
125 // As we can derive the xor key we can just populate the offsets 125 // As we can derive the xor key we can just populate the offsets
126 // based on the shared memory dump 126 // based on the shared memory dump
127 unsigned cur_offset = 0; 127 unsigned cur_offset = 0;
@@ -144,7 +144,7 @@ struct PL_U::Impl {
144 Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem; 144 Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
145 145
146 /// Backing memory for the shared font data 146 /// Backing memory for the shared font data
147 std::shared_ptr<std::vector<u8>> shared_font; 147 std::shared_ptr<Kernel::PhysicalMemory> shared_font;
148 148
149 // Automatically populated based on shared_fonts dump or system archives. 149 // Automatically populated based on shared_fonts dump or system archives.
150 std::vector<FontRegion> shared_font_regions; 150 std::vector<FontRegion> shared_font_regions;
@@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
166 // Rebuild shared fonts from data ncas 166 // Rebuild shared fonts from data ncas
167 if (nand->HasEntry(static_cast<u64>(FontArchives::Standard), 167 if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
168 FileSys::ContentRecordType::Data)) { 168 FileSys::ContentRecordType::Data)) {
169 impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE); 169 impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE);
170 for (auto font : SHARED_FONTS) { 170 for (auto font : SHARED_FONTS) {
171 const auto nca = 171 const auto nca =
172 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data); 172 nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data);
@@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
207 } 207 }
208 208
209 } else { 209 } else {
210 impl->shared_font = std::make_shared<std::vector<u8>>( 210 impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(
211 SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size 211 SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size
212 212
213 const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); 213 const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir);
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index 4f6042b00..5b8248433 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -8,6 +8,11 @@
8#include "common/bit_field.h" 8#include "common/bit_field.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/swap.h" 10#include "common/swap.h"
11#include "core/hle/service/nvdrv/nvdata.h"
12
13namespace Core {
14class System;
15}
11 16
12namespace Service::Nvidia::Devices { 17namespace Service::Nvidia::Devices {
13 18
@@ -15,7 +20,7 @@ namespace Service::Nvidia::Devices {
15/// implement the ioctl interface. 20/// implement the ioctl interface.
16class nvdevice { 21class nvdevice {
17public: 22public:
18 nvdevice() = default; 23 explicit nvdevice(Core::System& system) : system{system} {};
19 virtual ~nvdevice() = default; 24 virtual ~nvdevice() = default;
20 union Ioctl { 25 union Ioctl {
21 u32_le raw; 26 u32_le raw;
@@ -33,7 +38,11 @@ public:
33 * @param output A buffer where the output data will be written to. 38 * @param output A buffer where the output data will be written to.
34 * @returns The result code of the ioctl. 39 * @returns The result code of the ioctl.
35 */ 40 */
36 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) = 0; 41 virtual u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
42 IoctlCtrl& ctrl) = 0;
43
44protected:
45 Core::System& system;
37}; 46};
38 47
39} // namespace Service::Nvidia::Devices 48} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 20c7c39aa..926a1285d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -13,10 +13,12 @@
13 13
14namespace Service::Nvidia::Devices { 14namespace Service::Nvidia::Devices {
15 15
16nvdisp_disp0::nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 16nvdisp_disp0::nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
17nvdisp_disp0 ::~nvdisp_disp0() = default; 18nvdisp_disp0 ::~nvdisp_disp0() = default;
18 19
19u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 20u32 nvdisp_disp0::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) {
20 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 22 UNIMPLEMENTED_MSG("Unimplemented ioctl");
21 return 0; 23 return 0;
22} 24}
@@ -34,9 +36,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
34 addr, offset, width, height, stride, static_cast<PixelFormat>(format), 36 addr, offset, width, height, stride, static_cast<PixelFormat>(format),
35 transform, crop_rect}; 37 transform, crop_rect};
36 38
37 auto& instance = Core::System::GetInstance(); 39 system.GetPerfStats().EndGameFrame();
38 instance.GetPerfStats().EndGameFrame(); 40 system.GPU().SwapBuffers(&framebuffer);
39 instance.GPU().SwapBuffers(framebuffer);
40} 41}
41 42
42} // namespace Service::Nvidia::Devices 43} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index 12f3ef825..e79e490ff 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -17,10 +17,11 @@ class nvmap;
17 17
18class nvdisp_disp0 final : public nvdevice { 18class nvdisp_disp0 final : public nvdevice {
19public: 19public:
20 explicit nvdisp_disp0(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvdisp_disp0(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvdisp_disp0() override; 21 ~nvdisp_disp0() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
24 IoctlCtrl& ctrl) override;
24 25
25 /// Performs a screen flip, drawing the buffer pointed to by the handle. 26 /// Performs a screen flip, drawing the buffer pointed to by the handle.
26 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, 27 void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index af62d33d2..24ab3f2e9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -22,10 +22,12 @@ enum {
22}; 22};
23} 23}
24 24
25nvhost_as_gpu::nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 25nvhost_as_gpu::nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
26 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
26nvhost_as_gpu::~nvhost_as_gpu() = default; 27nvhost_as_gpu::~nvhost_as_gpu() = default;
27 28
28u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 29u32 nvhost_as_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
30 IoctlCtrl& ctrl) {
29 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 31 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
30 command.raw, input.size(), output.size()); 32 command.raw, input.size(), output.size());
31 33
@@ -65,7 +67,7 @@ u32 nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<u8>&
65 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages, 67 LOG_DEBUG(Service_NVDRV, "called, pages={:X}, page_size={:X}, flags={:X}", params.pages,
66 params.page_size, params.flags); 68 params.page_size, params.flags);
67 69
68 auto& gpu = Core::System::GetInstance().GPU(); 70 auto& gpu = system.GPU();
69 const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)}; 71 const u64 size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
70 if (params.flags & 1) { 72 if (params.flags & 1) {
71 params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1); 73 params.offset = gpu.MemoryManager().AllocateSpace(params.offset, size, 1);
@@ -85,7 +87,7 @@ u32 nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& output)
85 std::vector<IoctlRemapEntry> entries(num_entries); 87 std::vector<IoctlRemapEntry> entries(num_entries);
86 std::memcpy(entries.data(), input.data(), input.size()); 88 std::memcpy(entries.data(), input.data(), input.size());
87 89
88 auto& gpu = Core::System::GetInstance().GPU(); 90 auto& gpu = system.GPU();
89 for (const auto& entry : entries) { 91 for (const auto& entry : entries) {
90 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}", 92 LOG_WARNING(Service_NVDRV, "remap entry, offset=0x{:X} handle=0x{:X} pages=0x{:X}",
91 entry.offset, entry.nvmap_handle, entry.pages); 93 entry.offset, entry.nvmap_handle, entry.pages);
@@ -136,7 +138,7 @@ u32 nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8>& ou
136 // case to prevent unexpected behavior. 138 // case to prevent unexpected behavior.
137 ASSERT(object->id == params.nvmap_handle); 139 ASSERT(object->id == params.nvmap_handle);
138 140
139 auto& gpu = Core::System::GetInstance().GPU(); 141 auto& gpu = system.GPU();
140 142
141 if (params.flags & 1) { 143 if (params.flags & 1) {
142 params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size); 144 params.offset = gpu.MemoryManager().MapBufferEx(object->addr, params.offset, object->size);
@@ -173,8 +175,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
173 return 0; 175 return 0;
174 } 176 }
175 177
176 params.offset = Core::System::GetInstance().GPU().MemoryManager().UnmapBuffer(params.offset, 178 params.offset = system.GPU().MemoryManager().UnmapBuffer(params.offset, itr->second.size);
177 itr->second.size);
178 buffer_mappings.erase(itr->second.offset); 179 buffer_mappings.erase(itr->second.offset);
179 180
180 std::memcpy(output.data(), &params, output.size()); 181 std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index eb14b1da8..30ca5f4c3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -17,10 +17,11 @@ class nvmap;
17 17
18class nvhost_as_gpu final : public nvdevice { 18class nvhost_as_gpu final : public nvdevice {
19public: 19public:
20 explicit nvhost_as_gpu(std::shared_ptr<nvmap> nvmap_dev); 20 explicit nvhost_as_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
21 ~nvhost_as_gpu() override; 21 ~nvhost_as_gpu() override;
22 22
23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 23 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
24 IoctlCtrl& ctrl) override;
24 25
25private: 26private:
26 enum class IoctlCommand : u32_le { 27 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index b39fb9ef9..9a66a5f88 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -7,14 +7,20 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h"
11#include "core/hle/kernel/readable_event.h"
12#include "core/hle/kernel/writable_event.h"
10#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" 13#include "core/hle/service/nvdrv/devices/nvhost_ctrl.h"
14#include "video_core/gpu.h"
11 15
12namespace Service::Nvidia::Devices { 16namespace Service::Nvidia::Devices {
13 17
14nvhost_ctrl::nvhost_ctrl() = default; 18nvhost_ctrl::nvhost_ctrl(Core::System& system, EventInterface& events_interface)
19 : nvdevice(system), events_interface{events_interface} {}
15nvhost_ctrl::~nvhost_ctrl() = default; 20nvhost_ctrl::~nvhost_ctrl() = default;
16 21
17u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 22u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
23 IoctlCtrl& ctrl) {
18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 24 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
19 command.raw, input.size(), output.size()); 25 command.raw, input.size(), output.size());
20 26
@@ -22,11 +28,15 @@ u32 nvhost_ctrl::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<
22 case IoctlCommand::IocGetConfigCommand: 28 case IoctlCommand::IocGetConfigCommand:
23 return NvOsGetConfigU32(input, output); 29 return NvOsGetConfigU32(input, output);
24 case IoctlCommand::IocCtrlEventWaitCommand: 30 case IoctlCommand::IocCtrlEventWaitCommand:
25 return IocCtrlEventWait(input, output, false); 31 return IocCtrlEventWait(input, output, false, ctrl);
26 case IoctlCommand::IocCtrlEventWaitAsyncCommand: 32 case IoctlCommand::IocCtrlEventWaitAsyncCommand:
27 return IocCtrlEventWait(input, output, true); 33 return IocCtrlEventWait(input, output, true, ctrl);
28 case IoctlCommand::IocCtrlEventRegisterCommand: 34 case IoctlCommand::IocCtrlEventRegisterCommand:
29 return IocCtrlEventRegister(input, output); 35 return IocCtrlEventRegister(input, output);
36 case IoctlCommand::IocCtrlEventUnregisterCommand:
37 return IocCtrlEventUnregister(input, output);
38 case IoctlCommand::IocCtrlEventSignalCommand:
39 return IocCtrlEventSignal(input, output);
30 } 40 }
31 UNIMPLEMENTED_MSG("Unimplemented ioctl"); 41 UNIMPLEMENTED_MSG("Unimplemented ioctl");
32 return 0; 42 return 0;
@@ -41,23 +51,137 @@ u32 nvhost_ctrl::NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>&
41} 51}
42 52
43u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, 53u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output,
44 bool is_async) { 54 bool is_async, IoctlCtrl& ctrl) {
45 IocCtrlEventWaitParams params{}; 55 IocCtrlEventWaitParams params{};
46 std::memcpy(&params, input.data(), sizeof(params)); 56 std::memcpy(&params, input.data(), sizeof(params));
47 LOG_WARNING(Service_NVDRV, 57 LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_async={}",
48 "(STUBBED) called, syncpt_id={}, threshold={}, timeout={}, is_async={}", 58 params.syncpt_id, params.threshold, params.timeout, is_async);
49 params.syncpt_id, params.threshold, params.timeout, is_async);
50 59
51 // TODO(Subv): Implement actual syncpt waiting. 60 if (params.syncpt_id >= MaxSyncPoints) {
52 params.value = 0; 61 return NvResult::BadParameter;
62 }
63
64 auto& gpu = system.GPU();
65 // This is mostly to take into account unimplemented features. As synced
66 // gpu is always synced.
67 if (!gpu.IsAsync()) {
68 return NvResult::Success;
69 }
70 auto lock = gpu.LockSync();
71 const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
72 const s32 diff = current_syncpoint_value - params.threshold;
73 if (diff >= 0) {
74 params.value = current_syncpoint_value;
75 std::memcpy(output.data(), &params, sizeof(params));
76 return NvResult::Success;
77 }
78 const u32 target_value = current_syncpoint_value - diff;
79
80 if (!is_async) {
81 params.value = 0;
82 }
83
84 if (params.timeout == 0) {
85 std::memcpy(output.data(), &params, sizeof(params));
86 return NvResult::Timeout;
87 }
88
89 u32 event_id;
90 if (is_async) {
91 event_id = params.value & 0x00FF;
92 if (event_id >= MaxNvEvents) {
93 std::memcpy(output.data(), &params, sizeof(params));
94 return NvResult::BadParameter;
95 }
96 } else {
97 if (ctrl.fresh_call) {
98 const auto result = events_interface.GetFreeEvent();
99 if (result) {
100 event_id = *result;
101 } else {
102 LOG_CRITICAL(Service_NVDRV, "No Free Events available!");
103 event_id = params.value & 0x00FF;
104 }
105 } else {
106 event_id = ctrl.event_id;
107 }
108 }
109
110 EventState status = events_interface.status[event_id];
111 if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) {
112 events_interface.SetEventStatus(event_id, EventState::Waiting);
113 events_interface.assigned_syncpt[event_id] = params.syncpt_id;
114 events_interface.assigned_value[event_id] = target_value;
115 if (is_async) {
116 params.value = params.syncpt_id << 4;
117 } else {
118 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
119 }
120 params.value |= event_id;
121 events_interface.events[event_id].writable->Clear();
122 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
123 if (!is_async && ctrl.fresh_call) {
124 ctrl.must_delay = true;
125 ctrl.timeout = params.timeout;
126 ctrl.event_id = event_id;
127 return NvResult::Timeout;
128 }
129 std::memcpy(output.data(), &params, sizeof(params));
130 return NvResult::Timeout;
131 }
53 std::memcpy(output.data(), &params, sizeof(params)); 132 std::memcpy(output.data(), &params, sizeof(params));
54 return 0; 133 return NvResult::BadParameter;
55} 134}
56 135
57u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) { 136u32 nvhost_ctrl::IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output) {
58 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 137 IocCtrlEventRegisterParams params{};
59 // TODO(bunnei): Implement this. 138 std::memcpy(&params, input.data(), sizeof(params));
60 return 0; 139 const u32 event_id = params.user_event_id & 0x00FF;
140 LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
141 if (event_id >= MaxNvEvents) {
142 return NvResult::BadParameter;
143 }
144 if (events_interface.registered[event_id]) {
145 return NvResult::BadParameter;
146 }
147 events_interface.RegisterEvent(event_id);
148 return NvResult::Success;
149}
150
151u32 nvhost_ctrl::IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output) {
152 IocCtrlEventUnregisterParams params{};
153 std::memcpy(&params, input.data(), sizeof(params));
154 const u32 event_id = params.user_event_id & 0x00FF;
155 LOG_DEBUG(Service_NVDRV, " called, user_event_id: {:X}", event_id);
156 if (event_id >= MaxNvEvents) {
157 return NvResult::BadParameter;
158 }
159 if (!events_interface.registered[event_id]) {
160 return NvResult::BadParameter;
161 }
162 events_interface.UnregisterEvent(event_id);
163 return NvResult::Success;
164}
165
166u32 nvhost_ctrl::IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output) {
167 IocCtrlEventSignalParams params{};
168 std::memcpy(&params, input.data(), sizeof(params));
169 // TODO(Blinkhawk): This is normally called when an NvEvents timeout on WaitSynchronization
170 // It is believed from RE to cancel the GPU Event. However, better research is required
171 u32 event_id = params.user_event_id & 0x00FF;
172 LOG_WARNING(Service_NVDRV, "(STUBBED) called, user_event_id: {:X}", event_id);
173 if (event_id >= MaxNvEvents) {
174 return NvResult::BadParameter;
175 }
176 if (events_interface.status[event_id] == EventState::Waiting) {
177 auto& gpu = system.GPU();
178 if (gpu.CancelSyncptInterrupt(events_interface.assigned_syncpt[event_id],
179 events_interface.assigned_value[event_id])) {
180 events_interface.LiberateEvent(event_id);
181 events_interface.events[event_id].writable->Signal();
182 }
183 }
184 return NvResult::Success;
61} 185}
62 186
63} // namespace Service::Nvidia::Devices 187} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index 6d0de2212..14e6e7e57 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -8,15 +8,17 @@
8#include <vector> 8#include <vector>
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "core/hle/service/nvdrv/devices/nvdevice.h" 10#include "core/hle/service/nvdrv/devices/nvdevice.h"
11#include "core/hle/service/nvdrv/nvdrv.h"
11 12
12namespace Service::Nvidia::Devices { 13namespace Service::Nvidia::Devices {
13 14
14class nvhost_ctrl final : public nvdevice { 15class nvhost_ctrl final : public nvdevice {
15public: 16public:
16 nvhost_ctrl(); 17 explicit nvhost_ctrl(Core::System& system, EventInterface& events_interface);
17 ~nvhost_ctrl() override; 18 ~nvhost_ctrl() override;
18 19
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 20 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) override;
20 22
21private: 23private:
22 enum class IoctlCommand : u32_le { 24 enum class IoctlCommand : u32_le {
@@ -132,9 +134,16 @@ private:
132 134
133 u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output); 135 u32 NvOsGetConfigU32(const std::vector<u8>& input, std::vector<u8>& output);
134 136
135 u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async); 137 u32 IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& output, bool is_async,
138 IoctlCtrl& ctrl);
136 139
137 u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output); 140 u32 IocCtrlEventRegister(const std::vector<u8>& input, std::vector<u8>& output);
141
142 u32 IocCtrlEventUnregister(const std::vector<u8>& input, std::vector<u8>& output);
143
144 u32 IocCtrlEventSignal(const std::vector<u8>& input, std::vector<u8>& output);
145
146 EventInterface& events_interface;
138}; 147};
139 148
140} // namespace Service::Nvidia::Devices 149} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index 0e28755bd..988effd90 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -12,10 +12,11 @@
12 12
13namespace Service::Nvidia::Devices { 13namespace Service::Nvidia::Devices {
14 14
15nvhost_ctrl_gpu::nvhost_ctrl_gpu() = default; 15nvhost_ctrl_gpu::nvhost_ctrl_gpu(Core::System& system) : nvdevice(system) {}
16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default; 16nvhost_ctrl_gpu::~nvhost_ctrl_gpu() = default;
17 17
18u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 18u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
19 IoctlCtrl& ctrl) {
19 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 20 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
20 command.raw, input.size(), output.size()); 21 command.raw, input.size(), output.size());
21 22
@@ -185,7 +186,7 @@ u32 nvhost_ctrl_gpu::GetGpuTime(const std::vector<u8>& input, std::vector<u8>& o
185 186
186 IoctlGetGpuTime params{}; 187 IoctlGetGpuTime params{};
187 std::memcpy(&params, input.data(), input.size()); 188 std::memcpy(&params, input.data(), input.size());
188 const auto ns = Core::Timing::CyclesToNs(Core::System::GetInstance().CoreTiming().GetTicks()); 189 const auto ns = Core::Timing::CyclesToNs(system.CoreTiming().GetTicks());
189 params.gpu_time = static_cast<u64_le>(ns.count()); 190 params.gpu_time = static_cast<u64_le>(ns.count());
190 std::memcpy(output.data(), &params, output.size()); 191 std::memcpy(output.data(), &params, output.size());
191 return 0; 192 return 0;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 240435eea..2b035ae3f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_ctrl_gpu final : public nvdevice { 14class nvhost_ctrl_gpu final : public nvdevice {
15public: 15public:
16 nvhost_ctrl_gpu(); 16 explicit nvhost_ctrl_gpu(Core::System& system);
17 ~nvhost_ctrl_gpu() override; 17 ~nvhost_ctrl_gpu() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 8ce7bc7a5..241dac881 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -13,10 +13,12 @@
13 13
14namespace Service::Nvidia::Devices { 14namespace Service::Nvidia::Devices {
15 15
16nvhost_gpu::nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev) : nvmap_dev(std::move(nvmap_dev)) {} 16nvhost_gpu::nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
17 : nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
17nvhost_gpu::~nvhost_gpu() = default; 18nvhost_gpu::~nvhost_gpu() = default;
18 19
19u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 20u32 nvhost_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
21 IoctlCtrl& ctrl) {
20 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 22 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
21 command.raw, input.size(), output.size()); 23 command.raw, input.size(), output.size());
22 24
@@ -119,8 +121,10 @@ u32 nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8>& ou
119 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2, 121 params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
120 params.unk3); 122 params.unk3);
121 123
122 params.fence_out.id = 0; 124 auto& gpu = system.GPU();
123 params.fence_out.value = 0; 125 params.fence_out.id = assigned_syncpoints;
126 params.fence_out.value = gpu.GetSyncpointValue(assigned_syncpoints);
127 assigned_syncpoints++;
124 std::memcpy(output.data(), &params, output.size()); 128 std::memcpy(output.data(), &params, output.size());
125 return 0; 129 return 0;
126} 130}
@@ -143,7 +147,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
143 IoctlSubmitGpfifo params{}; 147 IoctlSubmitGpfifo params{};
144 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 148 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
145 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 149 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
146 params.address, params.num_entries, params.flags); 150 params.address, params.num_entries, params.flags.raw);
147 151
148 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + 152 ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
149 params.num_entries * sizeof(Tegra::CommandListHeader), 153 params.num_entries * sizeof(Tegra::CommandListHeader),
@@ -153,10 +157,18 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
153 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], 157 std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
154 params.num_entries * sizeof(Tegra::CommandListHeader)); 158 params.num_entries * sizeof(Tegra::CommandListHeader));
155 159
156 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); 160 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
161 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
162
163 auto& gpu = system.GPU();
164 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
165 if (params.flags.increment.Value()) {
166 params.fence_out.value += current_syncpoint_value;
167 } else {
168 params.fence_out.value = current_syncpoint_value;
169 }
170 gpu.PushGPUEntries(std::move(entries));
157 171
158 params.fence_out.id = 0;
159 params.fence_out.value = 0;
160 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo)); 172 std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
161 return 0; 173 return 0;
162} 174}
@@ -168,16 +180,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
168 IoctlSubmitGpfifo params{}; 180 IoctlSubmitGpfifo params{};
169 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo)); 181 std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
170 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", 182 LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
171 params.address, params.num_entries, params.flags); 183 params.address, params.num_entries, params.flags.raw);
172 184
173 Tegra::CommandList entries(params.num_entries); 185 Tegra::CommandList entries(params.num_entries);
174 Memory::ReadBlock(params.address, entries.data(), 186 Memory::ReadBlock(params.address, entries.data(),
175 params.num_entries * sizeof(Tegra::CommandListHeader)); 187 params.num_entries * sizeof(Tegra::CommandListHeader));
176 188
177 Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); 189 UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
190 UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
191
192 auto& gpu = system.GPU();
193 u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
194 if (params.flags.increment.Value()) {
195 params.fence_out.value += current_syncpoint_value;
196 } else {
197 params.fence_out.value = current_syncpoint_value;
198 }
199 gpu.PushGPUEntries(std::move(entries));
178 200
179 params.fence_out.id = 0;
180 params.fence_out.value = 0;
181 std::memcpy(output.data(), &params, output.size()); 201 std::memcpy(output.data(), &params, output.size());
182 return 0; 202 return 0;
183} 203}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 62beb5c0c..d2e8fbae9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,6 +10,7 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/swap.h" 11#include "common/swap.h"
12#include "core/hle/service/nvdrv/devices/nvdevice.h" 12#include "core/hle/service/nvdrv/devices/nvdevice.h"
13#include "core/hle/service/nvdrv/nvdata.h"
13 14
14namespace Service::Nvidia::Devices { 15namespace Service::Nvidia::Devices {
15 16
@@ -20,10 +21,11 @@ constexpr u32 NVGPU_IOCTL_CHANNEL_KICKOFF_PB(0x1b);
20 21
21class nvhost_gpu final : public nvdevice { 22class nvhost_gpu final : public nvdevice {
22public: 23public:
23 explicit nvhost_gpu(std::shared_ptr<nvmap> nvmap_dev); 24 explicit nvhost_gpu(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
24 ~nvhost_gpu() override; 25 ~nvhost_gpu() override;
25 26
26 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 27 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
28 IoctlCtrl& ctrl) override;
27 29
28private: 30private:
29 enum class IoctlCommand : u32_le { 31 enum class IoctlCommand : u32_le {
@@ -113,11 +115,7 @@ private:
113 static_assert(sizeof(IoctlGetErrorNotification) == 16, 115 static_assert(sizeof(IoctlGetErrorNotification) == 16,
114 "IoctlGetErrorNotification is incorrect size"); 116 "IoctlGetErrorNotification is incorrect size");
115 117
116 struct IoctlFence { 118 static_assert(sizeof(Fence) == 8, "Fence is incorrect size");
117 u32_le id;
118 u32_le value;
119 };
120 static_assert(sizeof(IoctlFence) == 8, "IoctlFence is incorrect size");
121 119
122 struct IoctlAllocGpfifoEx { 120 struct IoctlAllocGpfifoEx {
123 u32_le num_entries; 121 u32_le num_entries;
@@ -132,13 +130,13 @@ private:
132 static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size"); 130 static_assert(sizeof(IoctlAllocGpfifoEx) == 32, "IoctlAllocGpfifoEx is incorrect size");
133 131
134 struct IoctlAllocGpfifoEx2 { 132 struct IoctlAllocGpfifoEx2 {
135 u32_le num_entries; // in 133 u32_le num_entries; // in
136 u32_le flags; // in 134 u32_le flags; // in
137 u32_le unk0; // in (1 works) 135 u32_le unk0; // in (1 works)
138 IoctlFence fence_out; // out 136 Fence fence_out; // out
139 u32_le unk1; // in 137 u32_le unk1; // in
140 u32_le unk2; // in 138 u32_le unk2; // in
141 u32_le unk3; // in 139 u32_le unk3; // in
142 }; 140 };
143 static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size"); 141 static_assert(sizeof(IoctlAllocGpfifoEx2) == 32, "IoctlAllocGpfifoEx2 is incorrect size");
144 142
@@ -153,10 +151,16 @@ private:
153 struct IoctlSubmitGpfifo { 151 struct IoctlSubmitGpfifo {
154 u64_le address; // pointer to gpfifo entry structs 152 u64_le address; // pointer to gpfifo entry structs
155 u32_le num_entries; // number of fence objects being submitted 153 u32_le num_entries; // number of fence objects being submitted
156 u32_le flags; 154 union {
157 IoctlFence fence_out; // returned new fence object for others to wait on 155 u32_le raw;
158 }; 156 BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list
159 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(IoctlFence), 157 BitField<1, 1, u32_le> add_increment; // append an increment to the list
158 BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
159 BitField<8, 1, u32_le> increment; // increment the returned fence
160 } flags;
161 Fence fence_out; // returned new fence object for others to wait on
162 };
163 static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
160 "IoctlSubmitGpfifo is incorrect size"); 164 "IoctlSubmitGpfifo is incorrect size");
161 165
162 struct IoctlGetWaitbase { 166 struct IoctlGetWaitbase {
@@ -184,6 +188,7 @@ private:
184 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output); 188 u32 ChannelSetTimeout(const std::vector<u8>& input, std::vector<u8>& output);
185 189
186 std::shared_ptr<nvmap> nvmap_dev; 190 std::shared_ptr<nvmap> nvmap_dev;
191 u32 assigned_syncpoints{};
187}; 192};
188 193
189} // namespace Service::Nvidia::Devices 194} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index f5e8ea7c3..f572ad30f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_nvdec::nvhost_nvdec() = default; 13nvhost_nvdec::nvhost_nvdec(Core::System& system) : nvdevice(system) {}
14nvhost_nvdec::~nvhost_nvdec() = default; 14nvhost_nvdec::~nvhost_nvdec() = default;
15 15
16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 0e7b284f8..2710f0511 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_nvdec final : public nvdevice { 14class nvhost_nvdec final : public nvdevice {
15public: 15public:
16 nvhost_nvdec(); 16 explicit nvhost_nvdec(Core::System& system);
17 ~nvhost_nvdec() override; 17 ~nvhost_nvdec() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 3e0951ab0..38282956f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_nvjpg::nvhost_nvjpg() = default; 13nvhost_nvjpg::nvhost_nvjpg(Core::System& system) : nvdevice(system) {}
14nvhost_nvjpg::~nvhost_nvjpg() = default; 14nvhost_nvjpg::~nvhost_nvjpg() = default;
15 15
16u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_nvjpg::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 89fd5e95e..379766693 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_nvjpg final : public nvdevice { 14class nvhost_nvjpg final : public nvdevice {
15public: 15public:
16 nvhost_nvjpg(); 16 explicit nvhost_nvjpg(Core::System& system);
17 ~nvhost_nvjpg() override; 17 ~nvhost_nvjpg() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index d544f0f31..70e8091db 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -10,10 +10,11 @@
10 10
11namespace Service::Nvidia::Devices { 11namespace Service::Nvidia::Devices {
12 12
13nvhost_vic::nvhost_vic() = default; 13nvhost_vic::nvhost_vic(Core::System& system) : nvdevice(system) {}
14nvhost_vic::~nvhost_vic() = default; 14nvhost_vic::~nvhost_vic() = default;
15 15
16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 16u32 nvhost_vic::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
17 IoctlCtrl& ctrl) {
17 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", 18 LOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}",
18 command.raw, input.size(), output.size()); 19 command.raw, input.size(), output.size());
19 20
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index fc24c3f9c..7d111977e 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,10 +13,11 @@ namespace Service::Nvidia::Devices {
13 13
14class nvhost_vic final : public nvdevice { 14class nvhost_vic final : public nvdevice {
15public: 15public:
16 nvhost_vic(); 16 explicit nvhost_vic(Core::System& system);
17 ~nvhost_vic() override; 17 ~nvhost_vic() override;
18 18
19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 19 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
20 IoctlCtrl& ctrl) override;
20 21
21private: 22private:
22 enum class IoctlCommand : u32_le { 23 enum class IoctlCommand : u32_le {
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 1ec796fc6..223b496b7 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -18,7 +18,7 @@ enum {
18}; 18};
19} 19}
20 20
21nvmap::nvmap() = default; 21nvmap::nvmap(Core::System& system) : nvdevice(system) {}
22nvmap::~nvmap() = default; 22nvmap::~nvmap() = default;
23 23
24VAddr nvmap::GetObjectAddress(u32 handle) const { 24VAddr nvmap::GetObjectAddress(u32 handle) const {
@@ -28,7 +28,8 @@ VAddr nvmap::GetObjectAddress(u32 handle) const {
28 return object->addr; 28 return object->addr;
29} 29}
30 30
31u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { 31u32 nvmap::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
32 IoctlCtrl& ctrl) {
32 switch (static_cast<IoctlCommand>(command.raw)) { 33 switch (static_cast<IoctlCommand>(command.raw)) {
33 case IoctlCommand::Create: 34 case IoctlCommand::Create:
34 return IocCreate(input, output); 35 return IocCreate(input, output);
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 396230c19..bf4a101c2 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -16,13 +16,14 @@ namespace Service::Nvidia::Devices {
16 16
17class nvmap final : public nvdevice { 17class nvmap final : public nvdevice {
18public: 18public:
19 nvmap(); 19 explicit nvmap(Core::System& system);
20 ~nvmap() override; 20 ~nvmap() override;
21 21
22 /// Returns the allocated address of an nvmap object given its handle. 22 /// Returns the allocated address of an nvmap object given its handle.
23 VAddr GetObjectAddress(u32 handle) const; 23 VAddr GetObjectAddress(u32 handle) const;
24 24
25 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; 25 u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output,
26 IoctlCtrl& ctrl) override;
26 27
27 /// Represents an nvmap object. 28 /// Represents an nvmap object.
28 struct Object { 29 struct Object {
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index b60fc748b..d5be64ed2 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -8,12 +8,18 @@
8#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/kernel.h" 9#include "core/hle/kernel/kernel.h"
10#include "core/hle/kernel/readable_event.h" 10#include "core/hle/kernel/readable_event.h"
11#include "core/hle/kernel/thread.h"
11#include "core/hle/kernel/writable_event.h" 12#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/interface.h" 13#include "core/hle/service/nvdrv/interface.h"
14#include "core/hle/service/nvdrv/nvdata.h"
13#include "core/hle/service/nvdrv/nvdrv.h" 15#include "core/hle/service/nvdrv/nvdrv.h"
14 16
15namespace Service::Nvidia { 17namespace Service::Nvidia {
16 18
19void NVDRV::SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value) {
20 nvdrv->SignalSyncpt(syncpoint_id, value);
21}
22
17void NVDRV::Open(Kernel::HLERequestContext& ctx) { 23void NVDRV::Open(Kernel::HLERequestContext& ctx) {
18 LOG_DEBUG(Service_NVDRV, "called"); 24 LOG_DEBUG(Service_NVDRV, "called");
19 25
@@ -36,11 +42,31 @@ void NVDRV::Ioctl(Kernel::HLERequestContext& ctx) {
36 42
37 std::vector<u8> output(ctx.GetWriteBufferSize()); 43 std::vector<u8> output(ctx.GetWriteBufferSize());
38 44
45 IoctlCtrl ctrl{};
46
47 u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output, ctrl);
48
49 if (ctrl.must_delay) {
50 ctrl.fresh_call = false;
51 ctx.SleepClientThread(
52 "NVServices::DelayedResponse", ctrl.timeout,
53 [=](Kernel::SharedPtr<Kernel::Thread> thread, Kernel::HLERequestContext& ctx,
54 Kernel::ThreadWakeupReason reason) {
55 IoctlCtrl ctrl2{ctrl};
56 std::vector<u8> output2 = output;
57 u32 result = nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output2, ctrl2);
58 ctx.WriteBuffer(output2);
59 IPC::ResponseBuilder rb{ctx, 3};
60 rb.Push(RESULT_SUCCESS);
61 rb.Push(result);
62 },
63 nvdrv->GetEventWriteable(ctrl.event_id));
64 } else {
65 ctx.WriteBuffer(output);
66 }
39 IPC::ResponseBuilder rb{ctx, 3}; 67 IPC::ResponseBuilder rb{ctx, 3};
40 rb.Push(RESULT_SUCCESS); 68 rb.Push(RESULT_SUCCESS);
41 rb.Push(nvdrv->Ioctl(fd, command, ctx.ReadBuffer(), output)); 69 rb.Push(result);
42
43 ctx.WriteBuffer(output);
44} 70}
45 71
46void NVDRV::Close(Kernel::HLERequestContext& ctx) { 72void NVDRV::Close(Kernel::HLERequestContext& ctx) {
@@ -66,13 +92,19 @@ void NVDRV::Initialize(Kernel::HLERequestContext& ctx) {
66void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { 92void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
67 IPC::RequestParser rp{ctx}; 93 IPC::RequestParser rp{ctx};
68 u32 fd = rp.Pop<u32>(); 94 u32 fd = rp.Pop<u32>();
69 u32 event_id = rp.Pop<u32>(); 95 // TODO(Blinkhawk): Figure the meaning of the flag at bit 16
96 u32 event_id = rp.Pop<u32>() & 0x000000FF;
70 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id); 97 LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}, event_id={:X}", fd, event_id);
71 98
72 IPC::ResponseBuilder rb{ctx, 3, 1}; 99 IPC::ResponseBuilder rb{ctx, 3, 1};
73 rb.Push(RESULT_SUCCESS); 100 rb.Push(RESULT_SUCCESS);
74 rb.PushCopyObjects(query_event.readable); 101 if (event_id < MaxNvEvents) {
75 rb.Push<u32>(0); 102 rb.PushCopyObjects(nvdrv->GetEvent(event_id));
103 rb.Push<u32>(NvResult::Success);
104 } else {
105 rb.Push<u32>(0);
106 rb.Push<u32>(NvResult::BadParameter);
107 }
76} 108}
77 109
78void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) { 110void NVDRV::SetClientPID(Kernel::HLERequestContext& ctx) {
@@ -127,10 +159,6 @@ NVDRV::NVDRV(std::shared_ptr<Module> nvdrv, const char* name)
127 {13, &NVDRV::FinishInitialize, "FinishInitialize"}, 159 {13, &NVDRV::FinishInitialize, "FinishInitialize"},
128 }; 160 };
129 RegisterHandlers(functions); 161 RegisterHandlers(functions);
130
131 auto& kernel = Core::System::GetInstance().Kernel();
132 query_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Automatic,
133 "NVDRV::query_event");
134} 162}
135 163
136NVDRV::~NVDRV() = default; 164NVDRV::~NVDRV() = default;
diff --git a/src/core/hle/service/nvdrv/interface.h b/src/core/hle/service/nvdrv/interface.h
index 5b4889910..10a0ecd52 100644
--- a/src/core/hle/service/nvdrv/interface.h
+++ b/src/core/hle/service/nvdrv/interface.h
@@ -19,6 +19,8 @@ public:
19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name); 19 NVDRV(std::shared_ptr<Module> nvdrv, const char* name);
20 ~NVDRV() override; 20 ~NVDRV() override;
21 21
22 void SignalGPUInterruptSyncpt(const u32 syncpoint_id, const u32 value);
23
22private: 24private:
23 void Open(Kernel::HLERequestContext& ctx); 25 void Open(Kernel::HLERequestContext& ctx);
24 void Ioctl(Kernel::HLERequestContext& ctx); 26 void Ioctl(Kernel::HLERequestContext& ctx);
@@ -33,8 +35,6 @@ private:
33 std::shared_ptr<Module> nvdrv; 35 std::shared_ptr<Module> nvdrv;
34 36
35 u64 pid{}; 37 u64 pid{};
36
37 Kernel::EventPair query_event;
38}; 38};
39 39
40} // namespace Service::Nvidia 40} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
new file mode 100644
index 000000000..ac03cbc23
--- /dev/null
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -0,0 +1,48 @@
1#pragma once
2
3#include <array>
4#include "common/common_types.h"
5
6namespace Service::Nvidia {
7
8constexpr u32 MaxSyncPoints = 192;
9constexpr u32 MaxNvEvents = 64;
10
11struct Fence {
12 s32 id;
13 u32 value;
14};
15
16static_assert(sizeof(Fence) == 8, "Fence has wrong size");
17
18struct MultiFence {
19 u32 num_fences;
20 std::array<Fence, 4> fences;
21};
22
23enum NvResult : u32 {
24 Success = 0,
25 BadParameter = 4,
26 Timeout = 5,
27 ResourceError = 15,
28};
29
30enum class EventState {
31 Free = 0,
32 Registered = 1,
33 Waiting = 2,
34 Busy = 3,
35};
36
37struct IoctlCtrl {
38 // First call done to the servioce for services that call itself again after a call.
39 bool fresh_call{true};
40 // Tells the Ioctl Wrapper that it must delay the IPC response and send the thread to sleep
41 bool must_delay{};
42 // Timeout for the delay
43 s64 timeout{};
44 // NV Event Id
45 s32 event_id{-1};
46};
47
48} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 6e4b8f2c6..2011a226a 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -4,7 +4,10 @@
4 4
5#include <utility> 5#include <utility>
6 6
7#include <fmt/format.h>
7#include "core/hle/ipc_helpers.h" 8#include "core/hle/ipc_helpers.h"
9#include "core/hle/kernel/readable_event.h"
10#include "core/hle/kernel/writable_event.h"
8#include "core/hle/service/nvdrv/devices/nvdevice.h" 11#include "core/hle/service/nvdrv/devices/nvdevice.h"
9#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 12#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
10#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 13#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
@@ -22,8 +25,9 @@
22 25
23namespace Service::Nvidia { 26namespace Service::Nvidia {
24 27
25void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger) { 28void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
26 auto module_ = std::make_shared<Module>(); 29 Core::System& system) {
30 auto module_ = std::make_shared<Module>(system);
27 std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager); 31 std::make_shared<NVDRV>(module_, "nvdrv")->InstallAsService(service_manager);
28 std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager); 32 std::make_shared<NVDRV>(module_, "nvdrv:a")->InstallAsService(service_manager);
29 std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager); 33 std::make_shared<NVDRV>(module_, "nvdrv:s")->InstallAsService(service_manager);
@@ -32,17 +36,25 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
32 nvflinger.SetNVDrvInstance(module_); 36 nvflinger.SetNVDrvInstance(module_);
33} 37}
34 38
35Module::Module() { 39Module::Module(Core::System& system) {
36 auto nvmap_dev = std::make_shared<Devices::nvmap>(); 40 auto& kernel = system.Kernel();
37 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(nvmap_dev); 41 for (u32 i = 0; i < MaxNvEvents; i++) {
38 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(nvmap_dev); 42 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
39 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(); 43 events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(
44 kernel, Kernel::ResetType::Automatic, event_label);
45 events_interface.status[i] = EventState::Free;
46 events_interface.registered[i] = false;
47 }
48 auto nvmap_dev = std::make_shared<Devices::nvmap>(system);
49 devices["/dev/nvhost-as-gpu"] = std::make_shared<Devices::nvhost_as_gpu>(system, nvmap_dev);
50 devices["/dev/nvhost-gpu"] = std::make_shared<Devices::nvhost_gpu>(system, nvmap_dev);
51 devices["/dev/nvhost-ctrl-gpu"] = std::make_shared<Devices::nvhost_ctrl_gpu>(system);
40 devices["/dev/nvmap"] = nvmap_dev; 52 devices["/dev/nvmap"] = nvmap_dev;
41 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); 53 devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
42 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); 54 devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(system, events_interface);
43 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); 55 devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system);
44 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(); 56 devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
45 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(); 57 devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system);
46} 58}
47 59
48Module::~Module() = default; 60Module::~Module() = default;
@@ -59,12 +71,13 @@ u32 Module::Open(const std::string& device_name) {
59 return fd; 71 return fd;
60} 72}
61 73
62u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output) { 74u32 Module::Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output,
75 IoctlCtrl& ctrl) {
63 auto itr = open_files.find(fd); 76 auto itr = open_files.find(fd);
64 ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device"); 77 ASSERT_MSG(itr != open_files.end(), "Tried to talk to an invalid device");
65 78
66 auto& device = itr->second; 79 auto& device = itr->second;
67 return device->ioctl({command}, input, output); 80 return device->ioctl({command}, input, output, ctrl);
68} 81}
69 82
70ResultCode Module::Close(u32 fd) { 83ResultCode Module::Close(u32 fd) {
@@ -77,4 +90,22 @@ ResultCode Module::Close(u32 fd) {
77 return RESULT_SUCCESS; 90 return RESULT_SUCCESS;
78} 91}
79 92
93void Module::SignalSyncpt(const u32 syncpoint_id, const u32 value) {
94 for (u32 i = 0; i < MaxNvEvents; i++) {
95 if (events_interface.assigned_syncpt[i] == syncpoint_id &&
96 events_interface.assigned_value[i] == value) {
97 events_interface.LiberateEvent(i);
98 events_interface.events[i].writable->Signal();
99 }
100 }
101}
102
103Kernel::SharedPtr<Kernel::ReadableEvent> Module::GetEvent(const u32 event_id) const {
104 return events_interface.events[event_id].readable;
105}
106
107Kernel::SharedPtr<Kernel::WritableEvent> Module::GetEventWriteable(const u32 event_id) const {
108 return events_interface.events[event_id].writable;
109}
110
80} // namespace Service::Nvidia 111} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 53564f696..a339ab672 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -8,8 +8,14 @@
8#include <unordered_map> 8#include <unordered_map>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/hle/kernel/writable_event.h"
12#include "core/hle/service/nvdrv/nvdata.h"
11#include "core/hle/service/service.h" 13#include "core/hle/service/service.h"
12 14
15namespace Core {
16class System;
17}
18
13namespace Service::NVFlinger { 19namespace Service::NVFlinger {
14class NVFlinger; 20class NVFlinger;
15} 21}
@@ -20,16 +26,72 @@ namespace Devices {
20class nvdevice; 26class nvdevice;
21} 27}
22 28
23struct IoctlFence { 29struct EventInterface {
24 u32 id; 30 // Mask representing currently busy events
25 u32 value; 31 u64 events_mask{};
32 // Each kernel event associated to an NV event
33 std::array<Kernel::EventPair, MaxNvEvents> events;
34 // The status of the current NVEvent
35 std::array<EventState, MaxNvEvents> status{};
36 // Tells if an NVEvent is registered or not
37 std::array<bool, MaxNvEvents> registered{};
38 // When an NVEvent is waiting on GPU interrupt, this is the sync_point
39 // associated with it.
40 std::array<u32, MaxNvEvents> assigned_syncpt{};
41 // This is the value of the GPU interrupt for which the NVEvent is waiting
42 // for.
43 std::array<u32, MaxNvEvents> assigned_value{};
44 // Constant to denote an unasigned syncpoint.
45 static constexpr u32 unassigned_syncpt = 0xFFFFFFFF;
46 std::optional<u32> GetFreeEvent() const {
47 u64 mask = events_mask;
48 for (u32 i = 0; i < MaxNvEvents; i++) {
49 const bool is_free = (mask & 0x1) == 0;
50 if (is_free) {
51 if (status[i] == EventState::Registered || status[i] == EventState::Free) {
52 return {i};
53 }
54 }
55 mask = mask >> 1;
56 }
57 return {};
58 }
59 void SetEventStatus(const u32 event_id, EventState new_status) {
60 EventState old_status = status[event_id];
61 if (old_status == new_status) {
62 return;
63 }
64 status[event_id] = new_status;
65 if (new_status == EventState::Registered) {
66 registered[event_id] = true;
67 }
68 if (new_status == EventState::Waiting || new_status == EventState::Busy) {
69 events_mask |= (1ULL << event_id);
70 }
71 }
72 void RegisterEvent(const u32 event_id) {
73 registered[event_id] = true;
74 if (status[event_id] == EventState::Free) {
75 status[event_id] = EventState::Registered;
76 }
77 }
78 void UnregisterEvent(const u32 event_id) {
79 registered[event_id] = false;
80 if (status[event_id] == EventState::Registered) {
81 status[event_id] = EventState::Free;
82 }
83 }
84 void LiberateEvent(const u32 event_id) {
85 status[event_id] = registered[event_id] ? EventState::Registered : EventState::Free;
86 events_mask &= ~(1ULL << event_id);
87 assigned_syncpt[event_id] = unassigned_syncpt;
88 assigned_value[event_id] = 0;
89 }
26}; 90};
27 91
28static_assert(sizeof(IoctlFence) == 8, "IoctlFence has wrong size");
29
30class Module final { 92class Module final {
31public: 93public:
32 Module(); 94 Module(Core::System& system);
33 ~Module(); 95 ~Module();
34 96
35 /// Returns a pointer to one of the available devices, identified by its name. 97 /// Returns a pointer to one of the available devices, identified by its name.
@@ -44,10 +106,17 @@ public:
44 /// Opens a device node and returns a file descriptor to it. 106 /// Opens a device node and returns a file descriptor to it.
45 u32 Open(const std::string& device_name); 107 u32 Open(const std::string& device_name);
46 /// Sends an ioctl command to the specified file descriptor. 108 /// Sends an ioctl command to the specified file descriptor.
47 u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output); 109 u32 Ioctl(u32 fd, u32 command, const std::vector<u8>& input, std::vector<u8>& output,
110 IoctlCtrl& ctrl);
48 /// Closes a device file descriptor and returns operation success. 111 /// Closes a device file descriptor and returns operation success.
49 ResultCode Close(u32 fd); 112 ResultCode Close(u32 fd);
50 113
114 void SignalSyncpt(const u32 syncpoint_id, const u32 value);
115
116 Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent(u32 event_id) const;
117
118 Kernel::SharedPtr<Kernel::WritableEvent> GetEventWriteable(u32 event_id) const;
119
51private: 120private:
52 /// Id to use for the next open file descriptor. 121 /// Id to use for the next open file descriptor.
53 u32 next_fd = 1; 122 u32 next_fd = 1;
@@ -57,9 +126,12 @@ private:
57 126
58 /// Mapping of device node names to their implementation. 127 /// Mapping of device node names to their implementation.
59 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices; 128 std::unordered_map<std::string, std::shared_ptr<Devices::nvdevice>> devices;
129
130 EventInterface events_interface;
60}; 131};
61 132
62/// Registers all NVDRV services with the specified service manager. 133/// Registers all NVDRV services with the specified service manager.
63void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger); 134void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger& nvflinger,
135 Core::System& system);
64 136
65} // namespace Service::Nvidia 137} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 5731e815f..e1a07d3ee 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -34,7 +34,8 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
34 buffer_wait_event.writable->Signal(); 34 buffer_wait_event.writable->Signal();
35} 35}
36 36
37std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) { 37std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
38 u32 height) {
38 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { 39 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
39 // Only consider free buffers. Buffers become free once again after they've been Acquired 40 // Only consider free buffers. Buffers become free once again after they've been Acquired
40 // and Released by the compositor, see the NVFlinger::Compose method. 41 // and Released by the compositor, see the NVFlinger::Compose method.
@@ -51,7 +52,7 @@ std::optional<u32> BufferQueue::DequeueBuffer(u32 width, u32 height) {
51 } 52 }
52 53
53 itr->status = Buffer::Status::Dequeued; 54 itr->status = Buffer::Status::Dequeued;
54 return itr->slot; 55 return {{itr->slot, &itr->multi_fence}};
55} 56}
56 57
57const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { 58const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
@@ -63,7 +64,8 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
63} 64}
64 65
65void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, 66void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
66 const Common::Rectangle<int>& crop_rect) { 67 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
68 Service::Nvidia::MultiFence& multi_fence) {
67 auto itr = std::find_if(queue.begin(), queue.end(), 69 auto itr = std::find_if(queue.begin(), queue.end(),
68 [&](const Buffer& buffer) { return buffer.slot == slot; }); 70 [&](const Buffer& buffer) { return buffer.slot == slot; });
69 ASSERT(itr != queue.end()); 71 ASSERT(itr != queue.end());
@@ -71,12 +73,21 @@ void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
71 itr->status = Buffer::Status::Queued; 73 itr->status = Buffer::Status::Queued;
72 itr->transform = transform; 74 itr->transform = transform;
73 itr->crop_rect = crop_rect; 75 itr->crop_rect = crop_rect;
76 itr->swap_interval = swap_interval;
77 itr->multi_fence = multi_fence;
78 queue_sequence.push_back(slot);
74} 79}
75 80
76std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { 81std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
77 auto itr = std::find_if(queue.begin(), queue.end(), [](const Buffer& buffer) { 82 auto itr = queue.end();
78 return buffer.status == Buffer::Status::Queued; 83 // Iterate to find a queued buffer matching the requested slot.
79 }); 84 while (itr == queue.end() && !queue_sequence.empty()) {
85 u32 slot = queue_sequence.front();
86 itr = std::find_if(queue.begin(), queue.end(), [&slot](const Buffer& buffer) {
87 return buffer.status == Buffer::Status::Queued && buffer.slot == slot;
88 });
89 queue_sequence.pop_front();
90 }
80 if (itr == queue.end()) 91 if (itr == queue.end())
81 return {}; 92 return {};
82 itr->status = Buffer::Status::Acquired; 93 itr->status = Buffer::Status::Acquired;
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index e1ccb6171..356bedb81 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <list>
7#include <optional> 8#include <optional>
8#include <vector> 9#include <vector>
9 10
@@ -12,6 +13,7 @@
12#include "common/swap.h" 13#include "common/swap.h"
13#include "core/hle/kernel/object.h" 14#include "core/hle/kernel/object.h"
14#include "core/hle/kernel/writable_event.h" 15#include "core/hle/kernel/writable_event.h"
16#include "core/hle/service/nvdrv/nvdata.h"
15 17
16namespace Service::NVFlinger { 18namespace Service::NVFlinger {
17 19
@@ -68,13 +70,17 @@ public:
68 IGBPBuffer igbp_buffer; 70 IGBPBuffer igbp_buffer;
69 BufferTransformFlags transform; 71 BufferTransformFlags transform;
70 Common::Rectangle<int> crop_rect; 72 Common::Rectangle<int> crop_rect;
73 u32 swap_interval;
74 Service::Nvidia::MultiFence multi_fence;
71 }; 75 };
72 76
73 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer); 77 void SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer);
74 std::optional<u32> DequeueBuffer(u32 width, u32 height); 78 std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> DequeueBuffer(u32 width,
79 u32 height);
75 const IGBPBuffer& RequestBuffer(u32 slot) const; 80 const IGBPBuffer& RequestBuffer(u32 slot) const;
76 void QueueBuffer(u32 slot, BufferTransformFlags transform, 81 void QueueBuffer(u32 slot, BufferTransformFlags transform,
77 const Common::Rectangle<int>& crop_rect); 82 const Common::Rectangle<int>& crop_rect, u32 swap_interval,
83 Service::Nvidia::MultiFence& multi_fence);
78 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 84 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
79 void ReleaseBuffer(u32 slot); 85 void ReleaseBuffer(u32 slot);
80 u32 Query(QueryType type); 86 u32 Query(QueryType type);
@@ -92,6 +98,7 @@ private:
92 u64 layer_id; 98 u64 layer_id;
93 99
94 std::vector<Buffer> queue; 100 std::vector<Buffer> queue;
101 std::list<u32> queue_sequence;
95 Kernel::EventPair buffer_wait_event; 102 Kernel::EventPair buffer_wait_event;
96}; 103};
97 104
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 3c5c53e24..f9db79370 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -37,15 +37,14 @@ NVFlinger::NVFlinger(Core::Timing::CoreTiming& core_timing) : core_timing{core_t
37 displays.emplace_back(4, "Null"); 37 displays.emplace_back(4, "Null");
38 38
39 // Schedule the screen composition events 39 // Schedule the screen composition events
40 const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : frame_ticks; 40 composition_event = core_timing.RegisterEvent("ScreenComposition", [this](u64 userdata,
41 41 s64 cycles_late) {
42 composition_event = core_timing.RegisterEvent( 42 Compose();
43 "ScreenComposition", [this, ticks](u64 userdata, s64 cycles_late) { 43 const auto ticks = Settings::values.force_30fps_mode ? frame_ticks_30fps : GetNextTicks();
44 Compose(); 44 this->core_timing.ScheduleEvent(std::max<s64>(0LL, ticks - cycles_late), composition_event);
45 this->core_timing.ScheduleEvent(ticks - cycles_late, composition_event); 45 });
46 }); 46
47 47 core_timing.ScheduleEvent(frame_ticks, composition_event);
48 core_timing.ScheduleEvent(ticks, composition_event);
49} 48}
50 49
51NVFlinger::~NVFlinger() { 50NVFlinger::~NVFlinger() {
@@ -206,8 +205,14 @@ void NVFlinger::Compose() {
206 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, 205 igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
207 buffer->get().transform, buffer->get().crop_rect); 206 buffer->get().transform, buffer->get().crop_rect);
208 207
208 swap_interval = buffer->get().swap_interval;
209 buffer_queue.ReleaseBuffer(buffer->get().slot); 209 buffer_queue.ReleaseBuffer(buffer->get().slot);
210 } 210 }
211} 211}
212 212
213s64 NVFlinger::GetNextTicks() const {
214 constexpr s64 max_hertz = 120LL;
215 return (Core::Timing::BASE_CLOCK_RATE * (1LL << swap_interval)) / max_hertz;
216}
217
213} // namespace Service::NVFlinger 218} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index c0a83fffb..988be8726 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -74,6 +74,8 @@ public:
74 /// finished. 74 /// finished.
75 void Compose(); 75 void Compose();
76 76
77 s64 GetNextTicks() const;
78
77private: 79private:
78 /// Finds the display identified by the specified ID. 80 /// Finds the display identified by the specified ID.
79 VI::Display* FindDisplay(u64 display_id); 81 VI::Display* FindDisplay(u64 display_id);
@@ -98,6 +100,8 @@ private:
98 /// layers. 100 /// layers.
99 u32 next_buffer_queue_id = 1; 101 u32 next_buffer_queue_id = 1;
100 102
103 u32 swap_interval = 1;
104
101 /// Event that handles screen composition. 105 /// Event that handles screen composition.
102 Core::Timing::EventType* composition_event; 106 Core::Timing::EventType* composition_event;
103 107
diff --git a/src/core/hle/service/pm/pm.cpp b/src/core/hle/service/pm/pm.cpp
index ebcc41a43..fe6b5f798 100644
--- a/src/core/hle/service/pm/pm.cpp
+++ b/src/core/hle/service/pm/pm.cpp
@@ -3,11 +3,44 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/hle/ipc_helpers.h" 5#include "core/hle/ipc_helpers.h"
6#include "core/hle/kernel/kernel.h"
7#include "core/hle/kernel/process.h"
6#include "core/hle/service/pm/pm.h" 8#include "core/hle/service/pm/pm.h"
7#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
8 10
9namespace Service::PM { 11namespace Service::PM {
10 12
13namespace {
14
15constexpr ResultCode ERROR_PROCESS_NOT_FOUND{ErrorModule::PM, 1};
16
17constexpr u64 NO_PROCESS_FOUND_PID{0};
18
19std::optional<Kernel::SharedPtr<Kernel::Process>> SearchProcessList(
20 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list,
21 std::function<bool(const Kernel::SharedPtr<Kernel::Process>&)> predicate) {
22 const auto iter = std::find_if(process_list.begin(), process_list.end(), predicate);
23
24 if (iter == process_list.end()) {
25 return std::nullopt;
26 }
27
28 return *iter;
29}
30
31void GetApplicationPidGeneric(Kernel::HLERequestContext& ctx,
32 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list) {
33 const auto process = SearchProcessList(process_list, [](const auto& process) {
34 return process->GetProcessID() == Kernel::Process::ProcessIDMin;
35 });
36
37 IPC::ResponseBuilder rb{ctx, 4};
38 rb.Push(RESULT_SUCCESS);
39 rb.Push(process.has_value() ? (*process)->GetProcessID() : NO_PROCESS_FOUND_PID);
40}
41
42} // Anonymous namespace
43
11class BootMode final : public ServiceFramework<BootMode> { 44class BootMode final : public ServiceFramework<BootMode> {
12public: 45public:
13 explicit BootMode() : ServiceFramework{"pm:bm"} { 46 explicit BootMode() : ServiceFramework{"pm:bm"} {
@@ -41,14 +74,15 @@ private:
41 74
42class DebugMonitor final : public ServiceFramework<DebugMonitor> { 75class DebugMonitor final : public ServiceFramework<DebugMonitor> {
43public: 76public:
44 explicit DebugMonitor() : ServiceFramework{"pm:dmnt"} { 77 explicit DebugMonitor(const Kernel::KernelCore& kernel)
78 : ServiceFramework{"pm:dmnt"}, kernel(kernel) {
45 // clang-format off 79 // clang-format off
46 static const FunctionInfo functions[] = { 80 static const FunctionInfo functions[] = {
47 {0, nullptr, "GetDebugProcesses"}, 81 {0, nullptr, "GetDebugProcesses"},
48 {1, nullptr, "StartDebugProcess"}, 82 {1, nullptr, "StartDebugProcess"},
49 {2, nullptr, "GetTitlePid"}, 83 {2, &DebugMonitor::GetTitlePid, "GetTitlePid"},
50 {3, nullptr, "EnableDebugForTitleId"}, 84 {3, nullptr, "EnableDebugForTitleId"},
51 {4, nullptr, "GetApplicationPid"}, 85 {4, &DebugMonitor::GetApplicationPid, "GetApplicationPid"},
52 {5, nullptr, "EnableDebugForApplication"}, 86 {5, nullptr, "EnableDebugForApplication"},
53 {6, nullptr, "DisableDebug"}, 87 {6, nullptr, "DisableDebug"},
54 }; 88 };
@@ -56,21 +90,77 @@ public:
56 90
57 RegisterHandlers(functions); 91 RegisterHandlers(functions);
58 } 92 }
93
94private:
95 void GetTitlePid(Kernel::HLERequestContext& ctx) {
96 IPC::RequestParser rp{ctx};
97 const auto title_id = rp.PopRaw<u64>();
98
99 LOG_DEBUG(Service_PM, "called, title_id={:016X}", title_id);
100
101 const auto process =
102 SearchProcessList(kernel.GetProcessList(), [title_id](const auto& process) {
103 return process->GetTitleID() == title_id;
104 });
105
106 if (!process.has_value()) {
107 IPC::ResponseBuilder rb{ctx, 2};
108 rb.Push(ERROR_PROCESS_NOT_FOUND);
109 return;
110 }
111
112 IPC::ResponseBuilder rb{ctx, 4};
113 rb.Push(RESULT_SUCCESS);
114 rb.Push((*process)->GetProcessID());
115 }
116
117 void GetApplicationPid(Kernel::HLERequestContext& ctx) {
118 LOG_DEBUG(Service_PM, "called");
119 GetApplicationPidGeneric(ctx, kernel.GetProcessList());
120 }
121
122 const Kernel::KernelCore& kernel;
59}; 123};
60 124
61class Info final : public ServiceFramework<Info> { 125class Info final : public ServiceFramework<Info> {
62public: 126public:
63 explicit Info() : ServiceFramework{"pm:info"} { 127 explicit Info(const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list)
128 : ServiceFramework{"pm:info"}, process_list(process_list) {
64 static const FunctionInfo functions[] = { 129 static const FunctionInfo functions[] = {
65 {0, nullptr, "GetTitleId"}, 130 {0, &Info::GetTitleId, "GetTitleId"},
66 }; 131 };
67 RegisterHandlers(functions); 132 RegisterHandlers(functions);
68 } 133 }
134
135private:
136 void GetTitleId(Kernel::HLERequestContext& ctx) {
137 IPC::RequestParser rp{ctx};
138 const auto process_id = rp.PopRaw<u64>();
139
140 LOG_DEBUG(Service_PM, "called, process_id={:016X}", process_id);
141
142 const auto process = SearchProcessList(process_list, [process_id](const auto& process) {
143 return process->GetProcessID() == process_id;
144 });
145
146 if (!process.has_value()) {
147 IPC::ResponseBuilder rb{ctx, 2};
148 rb.Push(ERROR_PROCESS_NOT_FOUND);
149 return;
150 }
151
152 IPC::ResponseBuilder rb{ctx, 4};
153 rb.Push(RESULT_SUCCESS);
154 rb.Push((*process)->GetTitleID());
155 }
156
157 const std::vector<Kernel::SharedPtr<Kernel::Process>>& process_list;
69}; 158};
70 159
71class Shell final : public ServiceFramework<Shell> { 160class Shell final : public ServiceFramework<Shell> {
72public: 161public:
73 explicit Shell() : ServiceFramework{"pm:shell"} { 162 explicit Shell(const Kernel::KernelCore& kernel)
163 : ServiceFramework{"pm:shell"}, kernel(kernel) {
74 // clang-format off 164 // clang-format off
75 static const FunctionInfo functions[] = { 165 static const FunctionInfo functions[] = {
76 {0, nullptr, "LaunchProcess"}, 166 {0, nullptr, "LaunchProcess"},
@@ -79,21 +169,31 @@ public:
79 {3, nullptr, "GetProcessEventWaiter"}, 169 {3, nullptr, "GetProcessEventWaiter"},
80 {4, nullptr, "GetProcessEventType"}, 170 {4, nullptr, "GetProcessEventType"},
81 {5, nullptr, "NotifyBootFinished"}, 171 {5, nullptr, "NotifyBootFinished"},
82 {6, nullptr, "GetApplicationPid"}, 172 {6, &Shell::GetApplicationPid, "GetApplicationPid"},
83 {7, nullptr, "BoostSystemMemoryResourceLimit"}, 173 {7, nullptr, "BoostSystemMemoryResourceLimit"},
84 {8, nullptr, "EnableAdditionalSystemThreads"}, 174 {8, nullptr, "EnableAdditionalSystemThreads"},
175 {9, nullptr, "GetUnimplementedEventHandle"},
85 }; 176 };
86 // clang-format on 177 // clang-format on
87 178
88 RegisterHandlers(functions); 179 RegisterHandlers(functions);
89 } 180 }
181
182private:
183 void GetApplicationPid(Kernel::HLERequestContext& ctx) {
184 LOG_DEBUG(Service_PM, "called");
185 GetApplicationPidGeneric(ctx, kernel.GetProcessList());
186 }
187
188 const Kernel::KernelCore& kernel;
90}; 189};
91 190
92void InstallInterfaces(SM::ServiceManager& sm) { 191void InstallInterfaces(Core::System& system) {
93 std::make_shared<BootMode>()->InstallAsService(sm); 192 std::make_shared<BootMode>()->InstallAsService(system.ServiceManager());
94 std::make_shared<DebugMonitor>()->InstallAsService(sm); 193 std::make_shared<DebugMonitor>(system.Kernel())->InstallAsService(system.ServiceManager());
95 std::make_shared<Info>()->InstallAsService(sm); 194 std::make_shared<Info>(system.Kernel().GetProcessList())
96 std::make_shared<Shell>()->InstallAsService(sm); 195 ->InstallAsService(system.ServiceManager());
196 std::make_shared<Shell>(system.Kernel())->InstallAsService(system.ServiceManager());
97} 197}
98 198
99} // namespace Service::PM 199} // namespace Service::PM
diff --git a/src/core/hle/service/pm/pm.h b/src/core/hle/service/pm/pm.h
index cc8d3f215..852e7050c 100644
--- a/src/core/hle/service/pm/pm.h
+++ b/src/core/hle/service/pm/pm.h
@@ -4,8 +4,8 @@
4 4
5#pragma once 5#pragma once
6 6
7namespace Service::SM { 7namespace Core {
8class ServiceManager; 8class System;
9} 9}
10 10
11namespace Service::PM { 11namespace Service::PM {
@@ -16,6 +16,6 @@ enum class SystemBootMode {
16}; 16};
17 17
18/// Registers all PM services with the specified service manager. 18/// Registers all PM services with the specified service manager.
19void InstallInterfaces(SM::ServiceManager& service_manager); 19void InstallInterfaces(Core::System& system);
20 20
21} // namespace Service::PM 21} // namespace Service::PM
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 952c03e27..3a0f8c3f6 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -206,7 +206,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
206 AM::InstallInterfaces(*sm, nv_flinger, system); 206 AM::InstallInterfaces(*sm, nv_flinger, system);
207 AOC::InstallInterfaces(*sm); 207 AOC::InstallInterfaces(*sm);
208 APM::InstallInterfaces(system); 208 APM::InstallInterfaces(system);
209 Audio::InstallInterfaces(*sm); 209 Audio::InstallInterfaces(*sm, system);
210 BCAT::InstallInterfaces(*sm); 210 BCAT::InstallInterfaces(*sm);
211 BPC::InstallInterfaces(*sm); 211 BPC::InstallInterfaces(*sm);
212 BtDrv::InstallInterfaces(*sm); 212 BtDrv::InstallInterfaces(*sm);
@@ -236,12 +236,12 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm, Core::System& system) {
236 NIM::InstallInterfaces(*sm); 236 NIM::InstallInterfaces(*sm);
237 NPNS::InstallInterfaces(*sm); 237 NPNS::InstallInterfaces(*sm);
238 NS::InstallInterfaces(*sm); 238 NS::InstallInterfaces(*sm);
239 Nvidia::InstallInterfaces(*sm, *nv_flinger); 239 Nvidia::InstallInterfaces(*sm, *nv_flinger, system);
240 PCIe::InstallInterfaces(*sm); 240 PCIe::InstallInterfaces(*sm);
241 PCTL::InstallInterfaces(*sm); 241 PCTL::InstallInterfaces(*sm);
242 PCV::InstallInterfaces(*sm); 242 PCV::InstallInterfaces(*sm);
243 PlayReport::InstallInterfaces(*sm); 243 PlayReport::InstallInterfaces(*sm);
244 PM::InstallInterfaces(*sm); 244 PM::InstallInterfaces(system);
245 PSC::InstallInterfaces(*sm); 245 PSC::InstallInterfaces(*sm);
246 PSM::InstallInterfaces(*sm); 246 PSM::InstallInterfaces(*sm);
247 Set::InstallInterfaces(*sm); 247 Set::InstallInterfaces(*sm);
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index f1fa6ccd1..199b30635 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -21,6 +21,7 @@
21#include "core/hle/kernel/readable_event.h" 21#include "core/hle/kernel/readable_event.h"
22#include "core/hle/kernel/thread.h" 22#include "core/hle/kernel/thread.h"
23#include "core/hle/kernel/writable_event.h" 23#include "core/hle/kernel/writable_event.h"
24#include "core/hle/service/nvdrv/nvdata.h"
24#include "core/hle/service/nvdrv/nvdrv.h" 25#include "core/hle/service/nvdrv/nvdrv.h"
25#include "core/hle/service/nvflinger/buffer_queue.h" 26#include "core/hle/service/nvflinger/buffer_queue.h"
26#include "core/hle/service/nvflinger/nvflinger.h" 27#include "core/hle/service/nvflinger/nvflinger.h"
@@ -328,32 +329,22 @@ public:
328 Data data; 329 Data data;
329}; 330};
330 331
331struct BufferProducerFence {
332 u32 is_valid;
333 std::array<Nvidia::IoctlFence, 4> fences;
334};
335static_assert(sizeof(BufferProducerFence) == 36, "BufferProducerFence has wrong size");
336
337class IGBPDequeueBufferResponseParcel : public Parcel { 332class IGBPDequeueBufferResponseParcel : public Parcel {
338public: 333public:
339 explicit IGBPDequeueBufferResponseParcel(u32 slot) : slot(slot) {} 334 explicit IGBPDequeueBufferResponseParcel(u32 slot, Service::Nvidia::MultiFence& multi_fence)
335 : slot(slot), multi_fence(multi_fence) {}
340 ~IGBPDequeueBufferResponseParcel() override = default; 336 ~IGBPDequeueBufferResponseParcel() override = default;
341 337
342protected: 338protected:
343 void SerializeData() override { 339 void SerializeData() override {
344 // TODO(Subv): Find out how this Fence is used.
345 BufferProducerFence fence = {};
346 fence.is_valid = 1;
347 for (auto& fence_ : fence.fences)
348 fence_.id = -1;
349
350 Write(slot); 340 Write(slot);
351 Write<u32_le>(1); 341 Write<u32_le>(1);
352 WriteObject(fence); 342 WriteObject(multi_fence);
353 Write<u32_le>(0); 343 Write<u32_le>(0);
354 } 344 }
355 345
356 u32_le slot; 346 u32_le slot;
347 Service::Nvidia::MultiFence multi_fence;
357}; 348};
358 349
359class IGBPRequestBufferRequestParcel : public Parcel { 350class IGBPRequestBufferRequestParcel : public Parcel {
@@ -400,12 +391,6 @@ public:
400 data = Read<Data>(); 391 data = Read<Data>();
401 } 392 }
402 393
403 struct Fence {
404 u32_le id;
405 u32_le value;
406 };
407 static_assert(sizeof(Fence) == 8, "Fence has wrong size");
408
409 struct Data { 394 struct Data {
410 u32_le slot; 395 u32_le slot;
411 INSERT_PADDING_WORDS(3); 396 INSERT_PADDING_WORDS(3);
@@ -418,15 +403,15 @@ public:
418 s32_le scaling_mode; 403 s32_le scaling_mode;
419 NVFlinger::BufferQueue::BufferTransformFlags transform; 404 NVFlinger::BufferQueue::BufferTransformFlags transform;
420 u32_le sticky_transform; 405 u32_le sticky_transform;
421 INSERT_PADDING_WORDS(2); 406 INSERT_PADDING_WORDS(1);
422 u32_le fence_is_valid; 407 u32_le swap_interval;
423 std::array<Fence, 2> fences; 408 Service::Nvidia::MultiFence multi_fence;
424 409
425 Common::Rectangle<int> GetCropRect() const { 410 Common::Rectangle<int> GetCropRect() const {
426 return {crop_left, crop_top, crop_right, crop_bottom}; 411 return {crop_left, crop_top, crop_right, crop_bottom};
427 } 412 }
428 }; 413 };
429 static_assert(sizeof(Data) == 80, "ParcelData has wrong size"); 414 static_assert(sizeof(Data) == 96, "ParcelData has wrong size");
430 415
431 Data data; 416 Data data;
432}; 417};
@@ -547,11 +532,11 @@ private:
547 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 532 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
548 const u32 width{request.data.width}; 533 const u32 width{request.data.width};
549 const u32 height{request.data.height}; 534 const u32 height{request.data.height};
550 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); 535 auto result = buffer_queue.DequeueBuffer(width, height);
551 536
552 if (slot) { 537 if (result) {
553 // Buffer is available 538 // Buffer is available
554 IGBPDequeueBufferResponseParcel response{*slot}; 539 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
555 ctx.WriteBuffer(response.Serialize()); 540 ctx.WriteBuffer(response.Serialize());
556 } else { 541 } else {
557 // Wait the current thread until a buffer becomes available 542 // Wait the current thread until a buffer becomes available
@@ -561,10 +546,10 @@ private:
561 Kernel::ThreadWakeupReason reason) { 546 Kernel::ThreadWakeupReason reason) {
562 // Repeat TransactParcel DequeueBuffer when a buffer is available 547 // Repeat TransactParcel DequeueBuffer when a buffer is available
563 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 548 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
564 std::optional<u32> slot = buffer_queue.DequeueBuffer(width, height); 549 auto result = buffer_queue.DequeueBuffer(width, height);
565 ASSERT_MSG(slot != std::nullopt, "Could not dequeue buffer."); 550 ASSERT_MSG(result != std::nullopt, "Could not dequeue buffer.");
566 551
567 IGBPDequeueBufferResponseParcel response{*slot}; 552 IGBPDequeueBufferResponseParcel response{result->first, *result->second};
568 ctx.WriteBuffer(response.Serialize()); 553 ctx.WriteBuffer(response.Serialize());
569 IPC::ResponseBuilder rb{ctx, 2}; 554 IPC::ResponseBuilder rb{ctx, 2};
570 rb.Push(RESULT_SUCCESS); 555 rb.Push(RESULT_SUCCESS);
@@ -582,7 +567,8 @@ private:
582 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 567 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
583 568
584 buffer_queue.QueueBuffer(request.data.slot, request.data.transform, 569 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
585 request.data.GetCropRect()); 570 request.data.GetCropRect(), request.data.swap_interval,
571 request.data.multi_fence);
586 572
587 IGBPQueueBufferResponseParcel response{1280, 720}; 573 IGBPQueueBufferResponseParcel response{1280, 720};
588 ctx.WriteBuffer(response.Serialize()); 574 ctx.WriteBuffer(response.Serialize());
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 6d4b02375..f1795fdd6 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
295 } 295 }
296 } 296 }
297 297
298 std::vector<u8> program_image(total_image_size); 298 Kernel::PhysicalMemory program_image(total_image_size);
299 std::size_t current_image_position = 0; 299 std::size_t current_image_position = 0;
300 300
301 Kernel::CodeSet codeset; 301 Kernel::CodeSet codeset;
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 70051c13a..474b55cb1 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
69 69
70 const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); 70 const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
71 Kernel::CodeSet codeset; 71 Kernel::CodeSet codeset;
72 std::vector<u8> program_image; 72 Kernel::PhysicalMemory program_image;
73 73
74 const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, 74 const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment,
75 const std::vector<u8>& data, u32 offset) { 75 const std::vector<u8>& data, u32 offset) {
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 6a0ca389b..e92e2e06e 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
143 } 143 }
144 144
145 // Build program image 145 // Build program image
146 std::vector<u8> program_image(PageAlignSize(nro_header.file_size)); 146 Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size));
147 std::memcpy(program_image.data(), data.data(), program_image.size()); 147 std::memcpy(program_image.data(), data.data(), program_image.size());
148 if (program_image.size() != PageAlignSize(nro_header.file_size)) { 148 if (program_image.size() != PageAlignSize(nro_header.file_size)) {
149 return {}; 149 return {};
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 29311404a..70c90109f 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
89 89
90 // Build program image 90 // Build program image
91 Kernel::CodeSet codeset; 91 Kernel::CodeSet codeset;
92 std::vector<u8> program_image; 92 Kernel::PhysicalMemory program_image;
93 for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { 93 for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
94 std::vector<u8> data = 94 std::vector<u8> data =
95 file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset); 95 file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 63aa59690..0dd1632ac 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -85,7 +85,6 @@ void LogSettings() {
85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); 85 LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0));
86 LogSetting("System_CurrentUser", Settings::values.current_user); 86 LogSetting("System_CurrentUser", Settings::values.current_user);
87 LogSetting("System_LanguageIndex", Settings::values.language_index); 87 LogSetting("System_LanguageIndex", Settings::values.language_index);
88 LogSetting("Core_CpuJitEnabled", Settings::values.cpu_jit_enabled);
89 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); 88 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
90 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 89 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
91 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 90 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
diff --git a/src/core/settings.h b/src/core/settings.h
index acf18d653..6638ce8f9 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -378,7 +378,6 @@ struct Values {
378 std::atomic_bool is_device_reload_pending{true}; 378 std::atomic_bool is_device_reload_pending{true};
379 379
380 // Core 380 // Core
381 bool cpu_jit_enabled;
382 bool use_multi_core; 381 bool use_multi_core;
383 382
384 // Data Storage 383 // Data Storage
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 98f49042a..793d102d3 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -168,7 +168,6 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
168 AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id); 168 AddField(Telemetry::FieldType::UserConfig, "Audio_SinkId", Settings::values.sink_id);
169 AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching", 169 AddField(Telemetry::FieldType::UserConfig, "Audio_EnableAudioStretching",
170 Settings::values.enable_audio_stretching); 170 Settings::values.enable_audio_stretching);
171 AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.cpu_jit_enabled);
172 AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore", 171 AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
173 Settings::values.use_multi_core); 172 Settings::values.use_multi_core);
174 AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor", 173 AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6839abe71..e2f85c5f1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,4 +1,7 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.h
2 dma_pusher.cpp 5 dma_pusher.cpp
3 dma_pusher.h 6 dma_pusher.h
4 debug_utils/debug_utils.cpp 7 debug_utils/debug_utils.cpp
@@ -43,8 +46,6 @@ add_library(video_core STATIC
43 renderer_opengl/gl_device.h 46 renderer_opengl/gl_device.h
44 renderer_opengl/gl_framebuffer_cache.cpp 47 renderer_opengl/gl_framebuffer_cache.cpp
45 renderer_opengl/gl_framebuffer_cache.h 48 renderer_opengl/gl_framebuffer_cache.h
46 renderer_opengl/gl_global_cache.cpp
47 renderer_opengl/gl_global_cache.h
48 renderer_opengl/gl_rasterizer.cpp 49 renderer_opengl/gl_rasterizer.cpp
49 renderer_opengl/gl_rasterizer.h 50 renderer_opengl/gl_rasterizer.h
50 renderer_opengl/gl_resource_manager.cpp 51 renderer_opengl/gl_resource_manager.cpp
@@ -101,8 +102,11 @@ add_library(video_core STATIC
101 shader/decode/integer_set.cpp 102 shader/decode/integer_set.cpp
102 shader/decode/half_set.cpp 103 shader/decode/half_set.cpp
103 shader/decode/video.cpp 104 shader/decode/video.cpp
105 shader/decode/warp.cpp
104 shader/decode/xmad.cpp 106 shader/decode/xmad.cpp
105 shader/decode/other.cpp 107 shader/decode/other.cpp
108 shader/control_flow.cpp
109 shader/control_flow.h
106 shader/decode.cpp 110 shader/decode.cpp
107 shader/node_helper.cpp 111 shader/node_helper.cpp
108 shader/node_helper.h 112 shader/node_helper.h
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
new file mode 100644
index 000000000..4b9193182
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -0,0 +1,76 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_set>
8#include <utility>
9
10#include "common/alignment.h"
11#include "common/common_types.h"
12#include "video_core/gpu.h"
13
14namespace VideoCommon {
15
16class BufferBlock {
17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start);
20 }
21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end;
24 }
25
26 u8* GetWritableHostPtr() const {
27 return FromCacheAddr(cache_addr);
28 }
29
30 u8* GetWritableHostPtr(std::size_t offset) const {
31 return FromCacheAddr(cache_addr + offset);
32 }
33
34 std::size_t GetOffset(const CacheAddr in_addr) {
35 return static_cast<std::size_t>(in_addr - cache_addr);
36 }
37
38 CacheAddr GetCacheAddr() const {
39 return cache_addr;
40 }
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 }
50
51 std::size_t GetSize() const {
52 return size;
53 }
54
55 void SetEpoch(u64 new_epoch) {
56 epoch = new_epoch;
57 }
58
59 u64 GetEpoch() {
60 return epoch;
61 }
62
63protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr);
66 }
67 ~BufferBlock() = default;
68
69private:
70 CacheAddr cache_addr{};
71 CacheAddr cache_addr_end{};
72 std::size_t size{};
73 u64 epoch{};
74};
75
76} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
new file mode 100644
index 000000000..2442ddfd6
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -0,0 +1,447 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/buffer_cache/buffer_block.h"
19#include "video_core/buffer_cache/map_interval.h"
20#include "video_core/memory_manager.h"
21#include "video_core/rasterizer_interface.h"
22
23namespace VideoCommon {
24
25using MapInterval = std::shared_ptr<MapIntervalBase>;
26
27template <typename TBuffer, typename TBufferType, typename StreamBuffer>
28class BufferCache {
29public:
30 using BufferInfo = std::pair<const TBufferType*, u64>;
31
32 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
33 bool is_written = false) {
34 std::lock_guard lock{mutex};
35
36 auto& memory_manager = system.GPU().MemoryManager();
37 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
38 if (!host_ptr) {
39 return {GetEmptyBuffer(size), 0};
40 }
41 const auto cache_addr = ToCacheAddr(host_ptr);
42
43 // Cache management is a big overhead, so only cache entries with a given size.
44 // TODO: Figure out which size is the best for given games.
45 constexpr std::size_t max_stream_size = 0x800;
46 if (size < max_stream_size) {
47 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
48 return StreamBufferUpload(host_ptr, size, alignment);
49 }
50 }
51
52 auto block = GetBlock(cache_addr, size);
53 auto map = MapAddress(block, gpu_addr, cache_addr, size);
54 if (is_written) {
55 map->MarkAsModified(true, GetModifiedTicks());
56 if (!map->IsWritten()) {
57 map->MarkAsWritten(true);
58 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
59 }
60 } else {
61 if (map->IsWritten()) {
62 WriteBarrier();
63 }
64 }
65
66 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
67
68 return {ToHandle(block), offset};
69 }
70
71 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
72 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
73 std::size_t alignment = 4) {
74 std::lock_guard lock{mutex};
75 return StreamBufferUpload(raw_pointer, size, alignment);
76 }
77
78 void Map(std::size_t max_size) {
79 std::lock_guard lock{mutex};
80
81 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
82 buffer_offset = buffer_offset_base;
83 }
84
85 /// Finishes the upload stream, returns true on bindings invalidation.
86 bool Unmap() {
87 std::lock_guard lock{mutex};
88
89 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
90 return std::exchange(invalidated, false);
91 }
92
93 void TickFrame() {
94 ++epoch;
95 while (!pending_destruction.empty()) {
96 if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
97 break;
98 }
99 pending_destruction.pop_front();
100 }
101 }
102
103 /// Write any cached resources overlapping the specified region back to memory
104 void FlushRegion(CacheAddr addr, std::size_t size) {
105 std::lock_guard lock{mutex};
106
107 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
108 std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
109 return a->GetModificationTick() < b->GetModificationTick();
110 });
111 for (auto& object : objects) {
112 if (object->IsModified() && object->IsRegistered()) {
113 FlushMap(object);
114 }
115 }
116 }
117
118 /// Mark the specified region as being invalidated
119 void InvalidateRegion(CacheAddr addr, u64 size) {
120 std::lock_guard lock{mutex};
121
122 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
123 for (auto& object : objects) {
124 if (object->IsRegistered()) {
125 Unregister(object);
126 }
127 }
128 }
129
130 virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
131
132protected:
133 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
134 std::unique_ptr<StreamBuffer> stream_buffer)
135 : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
136 stream_buffer_handle{this->stream_buffer->GetHandle()} {}
137
138 ~BufferCache() = default;
139
140 virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
141
142 virtual void WriteBarrier() = 0;
143
144 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
145
146 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
147 const u8* data) = 0;
148
149 virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
150 u8* data) = 0;
151
152 virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
153 std::size_t dst_offset, std::size_t size) = 0;
154
155 /// Register an object into the cache
156 void Register(const MapInterval& new_map, bool inherit_written = false) {
157 const CacheAddr cache_ptr = new_map->GetStart();
158 const std::optional<VAddr> cpu_addr =
159 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
160 if (!cache_ptr || !cpu_addr) {
161 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
162 new_map->GetGpuAddress());
163 return;
164 }
165 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
166 new_map->SetCpuAddress(*cpu_addr);
167 new_map->MarkAsRegistered(true);
168 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
169 mapped_addresses.insert({interval, new_map});
170 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
171 if (inherit_written) {
172 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
173 new_map->MarkAsWritten(true);
174 }
175 }
176
177 /// Unregisters an object from the cache
178 void Unregister(MapInterval& map) {
179 const std::size_t size = map->GetEnd() - map->GetStart();
180 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
181 map->MarkAsRegistered(false);
182 if (map->IsWritten()) {
183 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
184 }
185 const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
186 mapped_addresses.erase(delete_interval);
187 }
188
189private:
190 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
191 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
192 }
193
194 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
195 const CacheAddr cache_addr, const std::size_t size) {
196
197 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
198 if (overlaps.empty()) {
199 const CacheAddr cache_addr_end = cache_addr + size;
200 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
201 u8* host_ptr = FromCacheAddr(cache_addr);
202 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
203 Register(new_map);
204 return new_map;
205 }
206
207 const CacheAddr cache_addr_end = cache_addr + size;
208 if (overlaps.size() == 1) {
209 MapInterval& current_map = overlaps[0];
210 if (current_map->IsInside(cache_addr, cache_addr_end)) {
211 return current_map;
212 }
213 }
214 CacheAddr new_start = cache_addr;
215 CacheAddr new_end = cache_addr_end;
216 bool write_inheritance = false;
217 bool modified_inheritance = false;
218 // Calculate new buffer parameters
219 for (auto& overlap : overlaps) {
220 new_start = std::min(overlap->GetStart(), new_start);
221 new_end = std::max(overlap->GetEnd(), new_end);
222 write_inheritance |= overlap->IsWritten();
223 modified_inheritance |= overlap->IsModified();
224 }
225 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
226 for (auto& overlap : overlaps) {
227 Unregister(overlap);
228 }
229 UpdateBlock(block, new_start, new_end, overlaps);
230 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
231 if (modified_inheritance) {
232 new_map->MarkAsModified(true, GetModifiedTicks());
233 }
234 Register(new_map, write_inheritance);
235 return new_map;
236 }
237
238 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
239 std::vector<MapInterval>& overlaps) {
240 const IntervalType base_interval{start, end};
241 IntervalSet interval_set{};
242 interval_set.add(base_interval);
243 for (auto& overlap : overlaps) {
244 const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
245 interval_set.subtract(subtract);
246 }
247 for (auto& interval : interval_set) {
248 std::size_t size = interval.upper() - interval.lower();
249 if (size > 0) {
250 u8* host_ptr = FromCacheAddr(interval.lower());
251 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
252 }
253 }
254 }
255
256 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
257 if (size == 0) {
258 return {};
259 }
260
261 std::vector<MapInterval> objects{};
262 const IntervalType interval{addr, addr + size};
263 for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
264 objects.push_back(pair.second);
265 }
266
267 return objects;
268 }
269
270 /// Returns a ticks counter used for tracking when cached objects were last modified
271 u64 GetModifiedTicks() {
272 return ++modified_ticks;
273 }
274
275 void FlushMap(MapInterval map) {
276 std::size_t size = map->GetEnd() - map->GetStart();
277 TBuffer block = blocks[map->GetStart() >> block_page_bits];
278 u8* host_ptr = FromCacheAddr(map->GetStart());
279 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
280 map->MarkAsModified(false, 0);
281 }
282
283 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
284 std::size_t alignment) {
285 AlignBuffer(alignment);
286 const std::size_t uploaded_offset = buffer_offset;
287 std::memcpy(buffer_ptr, raw_pointer, size);
288
289 buffer_ptr += size;
290 buffer_offset += size;
291 return {&stream_buffer_handle, uploaded_offset};
292 }
293
294 void AlignBuffer(std::size_t alignment) {
295 // Align the offset, not the mapped pointer
296 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
297 buffer_ptr += offset_aligned - buffer_offset;
298 buffer_offset = offset_aligned;
299 }
300
301 TBuffer EnlargeBlock(TBuffer buffer) {
302 const std::size_t old_size = buffer->GetSize();
303 const std::size_t new_size = old_size + block_page_size;
304 const CacheAddr cache_addr = buffer->GetCacheAddr();
305 TBuffer new_buffer = CreateBlock(cache_addr, new_size);
306 CopyBlock(buffer, new_buffer, 0, 0, old_size);
307 buffer->SetEpoch(epoch);
308 pending_destruction.push_back(buffer);
309 const CacheAddr cache_addr_end = cache_addr + new_size - 1;
310 u64 page_start = cache_addr >> block_page_bits;
311 const u64 page_end = cache_addr_end >> block_page_bits;
312 while (page_start <= page_end) {
313 blocks[page_start] = new_buffer;
314 ++page_start;
315 }
316 return new_buffer;
317 }
318
319 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
320 const std::size_t size_1 = first->GetSize();
321 const std::size_t size_2 = second->GetSize();
322 const CacheAddr first_addr = first->GetCacheAddr();
323 const CacheAddr second_addr = second->GetCacheAddr();
324 const CacheAddr new_addr = std::min(first_addr, second_addr);
325 const std::size_t new_size = size_1 + size_2;
326 TBuffer new_buffer = CreateBlock(new_addr, new_size);
327 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
328 CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
329 first->SetEpoch(epoch);
330 second->SetEpoch(epoch);
331 pending_destruction.push_back(first);
332 pending_destruction.push_back(second);
333 const CacheAddr cache_addr_end = new_addr + new_size - 1;
334 u64 page_start = new_addr >> block_page_bits;
335 const u64 page_end = cache_addr_end >> block_page_bits;
336 while (page_start <= page_end) {
337 blocks[page_start] = new_buffer;
338 ++page_start;
339 }
340 return new_buffer;
341 }
342
343 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
344 TBuffer found{};
345 const CacheAddr cache_addr_end = cache_addr + size - 1;
346 u64 page_start = cache_addr >> block_page_bits;
347 const u64 page_end = cache_addr_end >> block_page_bits;
348 while (page_start <= page_end) {
349 auto it = blocks.find(page_start);
350 if (it == blocks.end()) {
351 if (found) {
352 found = EnlargeBlock(found);
353 } else {
354 const CacheAddr start_addr = (page_start << block_page_bits);
355 found = CreateBlock(start_addr, block_page_size);
356 blocks[page_start] = found;
357 }
358 } else {
359 if (found) {
360 if (found == it->second) {
361 ++page_start;
362 continue;
363 }
364 found = MergeBlocks(found, it->second);
365 } else {
366 found = it->second;
367 }
368 }
369 ++page_start;
370 }
371 return found;
372 }
373
374 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
375 u64 page_start = start >> write_page_bit;
376 const u64 page_end = end >> write_page_bit;
377 while (page_start <= page_end) {
378 auto it = written_pages.find(page_start);
379 if (it != written_pages.end()) {
380 it->second = it->second + 1;
381 } else {
382 written_pages[page_start] = 1;
383 }
384 page_start++;
385 }
386 }
387
388 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
389 u64 page_start = start >> write_page_bit;
390 const u64 page_end = end >> write_page_bit;
391 while (page_start <= page_end) {
392 auto it = written_pages.find(page_start);
393 if (it != written_pages.end()) {
394 if (it->second > 1) {
395 it->second = it->second - 1;
396 } else {
397 written_pages.erase(it);
398 }
399 }
400 page_start++;
401 }
402 }
403
404 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
405 u64 page_start = start >> write_page_bit;
406 const u64 page_end = end >> write_page_bit;
407 while (page_start <= page_end) {
408 if (written_pages.count(page_start) > 0) {
409 return true;
410 }
411 page_start++;
412 }
413 return false;
414 }
415
416 VideoCore::RasterizerInterface& rasterizer;
417 Core::System& system;
418 std::unique_ptr<StreamBuffer> stream_buffer;
419
420 TBufferType stream_buffer_handle{};
421
422 bool invalidated = false;
423
424 u8* buffer_ptr = nullptr;
425 u64 buffer_offset = 0;
426 u64 buffer_offset_base = 0;
427
428 using IntervalSet = boost::icl::interval_set<CacheAddr>;
429 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
430 using IntervalType = typename IntervalCache::interval_type;
431 IntervalCache mapped_addresses{};
432
433 static constexpr u64 write_page_bit{11};
434 std::unordered_map<u64, u32> written_pages{};
435
436 static constexpr u64 block_page_bits{21};
437 static constexpr u64 block_page_size{1 << block_page_bits};
438 std::unordered_map<u64, TBuffer> blocks{};
439
440 std::list<TBuffer> pending_destruction{};
441 u64 epoch{};
442 u64 modified_ticks{};
443
444 std::recursive_mutex mutex;
445};
446
447} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
new file mode 100644
index 000000000..3a104d5cd
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -0,0 +1,89 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "video_core/gpu.h"
9
10namespace VideoCommon {
11
12class MapIntervalBase {
13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16
17 void SetCpuAddress(VAddr new_cpu_addr) {
18 cpu_addr = new_cpu_addr;
19 }
20
21 VAddr GetCpuAddress() const {
22 return cpu_addr;
23 }
24
25 GPUVAddr GetGpuAddress() const {
26 return gpu_addr;
27 }
28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
30 return (start <= other_start && other_end <= end);
31 }
32
33 bool operator==(const MapIntervalBase& rhs) const {
34 return std::tie(start, end) == std::tie(rhs.start, rhs.end);
35 }
36
37 bool operator!=(const MapIntervalBase& rhs) const {
38 return !operator==(rhs);
39 }
40
41 void MarkAsRegistered(const bool registered) {
42 is_registered = registered;
43 }
44
45 bool IsRegistered() const {
46 return is_registered;
47 }
48
49 CacheAddr GetStart() const {
50 return start;
51 }
52
53 CacheAddr GetEnd() const {
54 return end;
55 }
56
57 void MarkAsModified(const bool is_modified_, const u64 tick) {
58 is_modified = is_modified_;
59 ticks = tick;
60 }
61
62 bool IsModified() const {
63 return is_modified;
64 }
65
66 u64 GetModificationTick() const {
67 return ticks;
68 }
69
70 void MarkAsWritten(const bool is_written_) {
71 is_written = is_written_;
72 }
73
74 bool IsWritten() const {
75 return is_written;
76 }
77
78private:
79 CacheAddr start;
80 CacheAddr end;
81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{};
83 bool is_written{};
84 bool is_modified{};
85 bool is_registered{};
86 u64 ticks{};
87};
88
89} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..0094fd715 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 // On entering GPU code, assume all memory may be touched by the ARM core. 24 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().dirty_flags.OnMemoryWrite(); 25 gpu.Maxwell3D().dirty.OnMemoryWrite();
26 26
27 dma_pushbuffer_subindex = 0; 27 dma_pushbuffer_subindex = 0;
28 28
@@ -31,6 +31,7 @@ void DmaPusher::DispatchCalls() {
31 break; 31 break;
32 } 32 }
33 } 33 }
34 gpu.FlushCommands();
34} 35}
35 36
36bool DmaPusher::Step() { 37bool DmaPusher::Step() {
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0ee228e28..98a8b5337 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,8 +10,7 @@
10 10
11namespace Tegra::Engines { 11namespace Tegra::Engines {
12 12
13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) 13Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
14 : rasterizer{rasterizer}, memory_manager{memory_manager} {}
15 14
16void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { 15void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
17 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 16 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 05421d185..0901cf2fa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -33,7 +33,7 @@ namespace Tegra::Engines {
33 33
34class Fermi2D final { 34class Fermi2D final {
35public: 35public:
36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager); 36 explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
37 ~Fermi2D() = default; 37 ~Fermi2D() = default;
38 38
39 /// Write the value to the register identified by method. 39 /// Write the value to the register identified by method.
@@ -145,7 +145,6 @@ public:
145 145
146private: 146private:
147 VideoCore::RasterizerInterface& rasterizer; 147 VideoCore::RasterizerInterface& rasterizer;
148 MemoryManager& memory_manager;
149 148
150 /// Performs the copy from the source surface to the destination surface as configured in the 149 /// Performs the copy from the source surface to the destination surface as configured in the
151 /// registers. 150 /// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7404a8163..08586d33c 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
37 const bool is_last_call = method_call.IsLastCall(); 37 const bool is_last_call = method_call.IsLastCall();
38 upload_state.ProcessData(method_call.argument, is_last_call); 38 upload_state.ProcessData(method_call.argument, is_last_call);
39 if (is_last_call) { 39 if (is_last_call) {
40 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 40 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
41 } 41 }
42 break; 42 break;
43 } 43 }
@@ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
50} 50}
51 51
52void KeplerCompute::ProcessLaunch() { 52void KeplerCompute::ProcessLaunch() {
53
54 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 53 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
55 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 54 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
56 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); 55 LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
57 56
58 const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; 57 const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
59 LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); 58 LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
59
60 rasterizer.DispatchCompute(code_addr);
60} 61}
61 62
62} // namespace Tegra::Engines 63} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..fa4a7c5c1 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -15,7 +15,7 @@
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) 17KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
18 : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} 18 : system{system}, upload_state{memory_manager, regs.upload} {}
19 19
20KeplerMemory::~KeplerMemory() = default; 20KeplerMemory::~KeplerMemory() = default;
21 21
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
34 const bool is_last_call = method_call.IsLastCall(); 34 const bool is_last_call = method_call.IsLastCall();
35 upload_state.ProcessData(method_call.argument, is_last_call); 35 upload_state.ProcessData(method_call.argument, is_last_call);
36 if (is_last_call) { 36 if (is_last_call) {
37 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 37 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
38 } 38 }
39 break; 39 break;
40 } 40 }
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f3bc675a9..e0e25c321 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -65,7 +65,6 @@ public:
65 65
66private: 66private:
67 Core::System& system; 67 Core::System& system;
68 MemoryManager& memory_manager;
69 Upload::State upload_state; 68 Upload::State upload_state;
70}; 69};
71 70
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..f5158d219 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
22 MemoryManager& memory_manager) 22 MemoryManager& memory_manager)
23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, 23 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { 24 macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
25 InitDirtySettings();
25 InitializeRegisterDefaults(); 26 InitializeRegisterDefaults();
26} 27}
27 28
@@ -69,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
69 regs.stencil_back_func_mask = 0xFFFFFFFF; 70 regs.stencil_back_func_mask = 0xFFFFFFFF;
70 regs.stencil_back_mask = 0xFFFFFFFF; 71 regs.stencil_back_mask = 0xFFFFFFFF;
71 72
73 regs.depth_test_func = Regs::ComparisonOp::Always;
74 regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
75 regs.cull.cull_face = Regs::Cull::CullFace::Back;
76
72 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a 77 // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
73 // register carrying a default value. Assume it's OpenGL's default (1). 78 // register carrying a default value. Assume it's OpenGL's default (1).
74 regs.point_size = 1.0f; 79 regs.point_size = 1.0f;
@@ -86,21 +91,168 @@ void Maxwell3D::InitializeRegisterDefaults() {
86 regs.rt_separate_frag_data = 1; 91 regs.rt_separate_frag_data = 1;
87} 92}
88 93
94#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
95
96void Maxwell3D::InitDirtySettings() {
97 const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
98 const auto start_itr = dirty_pointers.begin() + start;
99 const auto end_itr = start_itr + range;
100 std::fill(start_itr, end_itr, position);
101 };
102 dirty.regs.fill(true);
103
104 // Init Render Targets
105 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
106 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
107 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
108 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
109 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
110 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
111 rt_dirty_reg++;
112 }
113 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
114 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
115 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
116 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
117 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
118 constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
119 set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
120
121 // Init Vertex Arrays
122 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
123 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
124 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
125 u32 va_reg = DIRTY_REGS_POS(vertex_array);
126 u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
127 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
128 vertex_reg += vertex_array_size) {
129 set_block(vertex_reg, 3, va_reg);
130 // The divisor concerns vertex array instances
131 dirty_pointers[vertex_reg + 3] = vi_reg;
132 va_reg++;
133 vi_reg++;
134 }
135 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
136 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
137 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
138 va_reg = DIRTY_REGS_POS(vertex_array);
139 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
140 vertex_reg += vertex_limit_size) {
141 set_block(vertex_reg, vertex_limit_size, va_reg);
142 va_reg++;
143 }
144 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
145 constexpr u32 vertex_instance_size =
146 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
147 constexpr u32 vertex_instance_end =
148 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
149 vi_reg = DIRTY_REGS_POS(vertex_instance);
150 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
151 vertex_reg += vertex_instance_size) {
152 set_block(vertex_reg, vertex_instance_size, vi_reg);
153 vi_reg++;
154 }
155 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
156 DIRTY_REGS_POS(vertex_attrib_format));
157
158 // Init Shaders
159 constexpr u32 shader_registers_count =
160 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
161 set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
162 DIRTY_REGS_POS(shaders));
163
164 // State
165
166 // Viewport
167 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
168 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
169 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
170 set_block(viewport_start, viewport_size, viewport_dirty_reg);
171 constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
172 constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
173 set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
174
175 // Viewport transformation
176 constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
177 constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
178 set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
179
180 // Cullmode
181 constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
182 constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
183 set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
184
185 // Screen y control
186 dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
187
188 // Primitive Restart
189 constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
190 constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
191 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
192
193 // Depth Test
194 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
195 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
196 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
197 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
198
199 // Stencil Test
200 constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
201 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
202 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
205 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
206 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
207 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
208 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
209 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
210 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
211 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
212 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
213 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
214 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
215 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
216 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
217
218 // Color Mask
219 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
220 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
221 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
222 color_mask_dirty_reg);
223 // Blend State
224 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
225 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
226 blend_state_dirty_reg);
227 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
229 set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
230 blend_state_dirty_reg);
231
232 // Scissor State
233 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
234 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
235 scissor_test_dirty_reg);
236
237 // Polygon Offset
238 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
239 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
240 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
241 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
242 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
243 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
244 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
245}
246
89void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { 247void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
90 // Reset the current macro. 248 // Reset the current macro.
91 executing_macro = 0; 249 executing_macro = 0;
92 250
93 // Lookup the macro offset 251 // Lookup the macro offset
94 const u32 entry{(method - MacroRegistersStart) >> 1}; 252 const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
95 const auto& search{macro_offsets.find(entry)};
96 if (search == macro_offsets.end()) {
97 LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
98 UNREACHABLE();
99 return;
100 }
101 253
102 // Execute the current macro. 254 // Execute the current macro.
103 macro_interpreter.Execute(search->second, std::move(parameters)); 255 macro_interpreter.Execute(macro_positions[entry], std::move(parameters));
104} 256}
105 257
106void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { 258void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -108,6 +260,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
108 260
109 const u32 method = method_call.method; 261 const u32 method = method_call.method;
110 262
263 if (method == cb_data_state.current) {
264 regs.reg_array[method] = method_call.argument;
265 ProcessCBData(method_call.argument);
266 return;
267 } else if (cb_data_state.current != null_cb_data) {
268 FinishCBData();
269 }
270
111 // It is an error to write to a register other than the current macro's ARG register before it 271 // It is an error to write to a register other than the current macro's ARG register before it
112 // has finished execution. 272 // has finished execution.
113 if (executing_macro != 0) { 273 if (executing_macro != 0) {
@@ -143,49 +303,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
143 303
144 if (regs.reg_array[method] != method_call.argument) { 304 if (regs.reg_array[method] != method_call.argument) {
145 regs.reg_array[method] = method_call.argument; 305 regs.reg_array[method] = method_call.argument;
146 // Color buffers 306 const std::size_t dirty_reg = dirty_pointers[method];
147 constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt); 307 if (dirty_reg) {
148 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 308 dirty.regs[dirty_reg] = true;
149 if (method >= first_rt_reg && 309 if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
150 method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) { 310 dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
151 const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt; 311 dirty.vertex_array_buffers = true;
152 dirty_flags.color_buffer.set(rt_index); 312 } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
153 } 313 dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
154 314 dirty.vertex_instances = true;
155 // Zeta buffer 315 } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
156 constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); 316 dirty_reg < DIRTY_REGS_POS(render_settings)) {
157 if (method == MAXWELL3D_REG_INDEX(zeta_enable) || 317 dirty.render_settings = true;
158 method == MAXWELL3D_REG_INDEX(zeta_width) || 318 }
159 method == MAXWELL3D_REG_INDEX(zeta_height) ||
160 (method >= MAXWELL3D_REG_INDEX(zeta) &&
161 method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
162 dirty_flags.zeta_buffer = true;
163 }
164
165 // Shader
166 constexpr u32 shader_registers_count =
167 sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
168 if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
169 method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
170 dirty_flags.shaders = true;
171 }
172
173 // Vertex format
174 if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
175 method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
176 dirty_flags.vertex_attrib_format = true;
177 }
178
179 // Vertex buffer
180 if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
181 method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
182 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
183 } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
184 method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
185 dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
186 } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
187 method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
188 dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
189 } 319 }
190 } 320 }
191 321
@@ -214,7 +344,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
214 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]): 344 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
215 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]): 345 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
216 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): { 346 case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
217 ProcessCBData(method_call.argument); 347 StartCBData(method);
218 break; 348 break;
219 } 349 }
220 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): { 350 case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -249,6 +379,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
249 ProcessQueryGet(); 379 ProcessQueryGet();
250 break; 380 break;
251 } 381 }
382 case MAXWELL3D_REG_INDEX(condition.mode): {
383 ProcessQueryCondition();
384 break;
385 }
252 case MAXWELL3D_REG_INDEX(sync_info): { 386 case MAXWELL3D_REG_INDEX(sync_info): {
253 ProcessSyncPoint(); 387 ProcessSyncPoint();
254 break; 388 break;
@@ -261,7 +395,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
261 const bool is_last_call = method_call.IsLastCall(); 395 const bool is_last_call = method_call.IsLastCall();
262 upload_state.ProcessData(method_call.argument, is_last_call); 396 upload_state.ProcessData(method_call.argument, is_last_call);
263 if (is_last_call) { 397 if (is_last_call) {
264 dirty_flags.OnMemoryWrite(); 398 dirty.OnMemoryWrite();
265 } 399 }
266 break; 400 break;
267 } 401 }
@@ -281,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) {
281} 415}
282 416
283void Maxwell3D::ProcessMacroBind(u32 data) { 417void Maxwell3D::ProcessMacroBind(u32 data) {
284 macro_offsets[regs.macros.entry] = data; 418 macro_positions[regs.macros.entry++] = data;
285} 419}
286 420
287void Maxwell3D::ProcessQueryGet() { 421void Maxwell3D::ProcessQueryGet() {
@@ -302,6 +436,7 @@ void Maxwell3D::ProcessQueryGet() {
302 result = regs.query.query_sequence; 436 result = regs.query.query_sequence;
303 break; 437 break;
304 default: 438 default:
439 result = 1;
305 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 440 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
306 static_cast<u32>(regs.query.query_get.select.Value())); 441 static_cast<u32>(regs.query.query_get.select.Value()));
307 } 442 }
@@ -333,7 +468,6 @@ void Maxwell3D::ProcessQueryGet() {
333 query_result.timestamp = system.CoreTiming().GetTicks(); 468 query_result.timestamp = system.CoreTiming().GetTicks();
334 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); 469 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
335 } 470 }
336 dirty_flags.OnMemoryWrite();
337 break; 471 break;
338 } 472 }
339 default: 473 default:
@@ -342,12 +476,52 @@ void Maxwell3D::ProcessQueryGet() {
342 } 476 }
343} 477}
344 478
479void Maxwell3D::ProcessQueryCondition() {
480 const GPUVAddr condition_address{regs.condition.Address()};
481 switch (regs.condition.mode) {
482 case Regs::ConditionMode::Always: {
483 execute_on = true;
484 break;
485 }
486 case Regs::ConditionMode::Never: {
487 execute_on = false;
488 break;
489 }
490 case Regs::ConditionMode::ResNonZero: {
491 Regs::QueryCompare cmp;
492 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
493 execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
494 break;
495 }
496 case Regs::ConditionMode::Equal: {
497 Regs::QueryCompare cmp;
498 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
499 execute_on =
500 cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
501 break;
502 }
503 case Regs::ConditionMode::NotEqual: {
504 Regs::QueryCompare cmp;
505 memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
506 execute_on =
507 cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
508 break;
509 }
510 default: {
511 UNIMPLEMENTED_MSG("Uninplemented Condition Mode!");
512 execute_on = true;
513 break;
514 }
515 }
516}
517
345void Maxwell3D::ProcessSyncPoint() { 518void Maxwell3D::ProcessSyncPoint() {
346 const u32 sync_point = regs.sync_info.sync_point.Value(); 519 const u32 sync_point = regs.sync_info.sync_point.Value();
347 const u32 increment = regs.sync_info.increment.Value(); 520 const u32 increment = regs.sync_info.increment.Value();
348 const u32 cache_flush = regs.sync_info.unknown.Value(); 521 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
349 LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, 522 if (increment) {
350 cache_flush); 523 system.GPU().IncrementSyncPoint(sync_point);
524 }
351} 525}
352 526
353void Maxwell3D::DrawArrays() { 527void Maxwell3D::DrawArrays() {
@@ -405,23 +579,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
405} 579}
406 580
407void Maxwell3D::ProcessCBData(u32 value) { 581void Maxwell3D::ProcessCBData(u32 value) {
582 const u32 id = cb_data_state.id;
583 cb_data_state.buffer[id][cb_data_state.counter] = value;
584 // Increment the current buffer position.
585 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
586 cb_data_state.counter++;
587}
588
589void Maxwell3D::StartCBData(u32 method) {
590 constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
591 cb_data_state.start_pos = regs.const_buffer.cb_pos;
592 cb_data_state.id = method - first_cb_data;
593 cb_data_state.current = method;
594 cb_data_state.counter = 0;
595 ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
596}
597
598void Maxwell3D::FinishCBData() {
408 // Write the input value to the current const buffer at the current position. 599 // Write the input value to the current const buffer at the current position.
409 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress(); 600 const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
410 ASSERT(buffer_address != 0); 601 ASSERT(buffer_address != 0);
411 602
412 // Don't allow writing past the end of the buffer. 603 // Don't allow writing past the end of the buffer.
413 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 604 ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
414 605
415 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos}; 606 const GPUVAddr address{buffer_address + cb_data_state.start_pos};
607 const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
416 608
417 u8* ptr{memory_manager.GetPointer(address)}; 609 const u32 id = cb_data_state.id;
418 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 610 memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
419 memory_manager.Write<u32>(address, value); 611 dirty.OnMemoryWrite();
420 612
421 dirty_flags.OnMemoryWrite(); 613 cb_data_state.id = null_cb_data;
422 614 cb_data_state.current = null_cb_data;
423 // Increment the current buffer position.
424 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
425} 615}
426 616
427Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 617Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
@@ -430,10 +620,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
430 Texture::TICEntry tic_entry; 620 Texture::TICEntry tic_entry;
431 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 621 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
432 622
433 const auto r_type{tic_entry.r_type.Value()}; 623 [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
434 const auto g_type{tic_entry.g_type.Value()}; 624 [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
435 const auto b_type{tic_entry.b_type.Value()}; 625 [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
436 const auto a_type{tic_entry.a_type.Value()}; 626 [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
437 627
438 // TODO(Subv): Different data types for separate components are not supported 628 // TODO(Subv): Different data types for separate components are not supported
439 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); 629 DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 13e314944..0184342a0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -67,6 +67,7 @@ public:
67 static constexpr std::size_t MaxShaderStage = 5; 67 static constexpr std::size_t MaxShaderStage = 5;
68 // Maximum number of const buffers per shader stage. 68 // Maximum number of const buffers per shader stage.
69 static constexpr std::size_t MaxConstBuffers = 18; 69 static constexpr std::size_t MaxConstBuffers = 18;
70 static constexpr std::size_t MaxConstBufferSize = 0x10000;
70 71
71 enum class QueryMode : u32 { 72 enum class QueryMode : u32 {
72 Write = 0, 73 Write = 0,
@@ -89,6 +90,20 @@ public:
89 90
90 enum class QuerySelect : u32 { 91 enum class QuerySelect : u32 {
91 Zero = 0, 92 Zero = 0,
93 TimeElapsed = 2,
94 TransformFeedbackPrimitivesGenerated = 11,
95 PrimitivesGenerated = 18,
96 SamplesPassed = 21,
97 TransformFeedbackUnknown = 26,
98 };
99
100 struct QueryCompare {
101 u32 initial_sequence;
102 u32 initial_mode;
103 u32 unknown1;
104 u32 unknown2;
105 u32 current_sequence;
106 u32 current_mode;
92 }; 107 };
93 108
94 enum class QuerySyncCondition : u32 { 109 enum class QuerySyncCondition : u32 {
@@ -96,6 +111,14 @@ public:
96 GreaterThan = 1, 111 GreaterThan = 1,
97 }; 112 };
98 113
114 enum class ConditionMode : u32 {
115 Never = 0,
116 Always = 1,
117 ResNonZero = 2,
118 Equal = 3,
119 NotEqual = 4,
120 };
121
99 enum class ShaderProgram : u32 { 122 enum class ShaderProgram : u32 {
100 VertexA = 0, 123 VertexA = 0,
101 VertexB = 1, 124 VertexB = 1,
@@ -814,7 +837,18 @@ public:
814 BitField<4, 1, u32> alpha_to_one; 837 BitField<4, 1, u32> alpha_to_one;
815 } multisample_control; 838 } multisample_control;
816 839
817 INSERT_PADDING_WORDS(0x7); 840 INSERT_PADDING_WORDS(0x4);
841
842 struct {
843 u32 address_high;
844 u32 address_low;
845 ConditionMode mode;
846
847 GPUVAddr Address() const {
848 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
849 address_low);
850 }
851 } condition;
818 852
819 struct { 853 struct {
820 u32 tsc_address_high; 854 u32 tsc_address_high;
@@ -1123,23 +1157,77 @@ public:
1123 1157
1124 State state{}; 1158 State state{};
1125 1159
1126 struct DirtyFlags { 1160 struct DirtyRegs {
1127 std::bitset<8> color_buffer{0xFF}; 1161 static constexpr std::size_t NUM_REGS = 256;
1128 std::bitset<32> vertex_array{0xFFFFFFFF}; 1162 union {
1163 struct {
1164 bool null_dirty;
1165
1166 // Vertex Attributes
1167 bool vertex_attrib_format;
1168
1169 // Vertex Arrays
1170 std::array<bool, 32> vertex_array;
1171
1172 bool vertex_array_buffers;
1173
1174 // Vertex Instances
1175 std::array<bool, 32> vertex_instance;
1129 1176
1130 bool vertex_attrib_format = true; 1177 bool vertex_instances;
1131 bool zeta_buffer = true; 1178
1132 bool shaders = true; 1179 // Render Targets
1180 std::array<bool, 8> render_target;
1181 bool depth_buffer;
1182
1183 bool render_settings;
1184
1185 // Shaders
1186 bool shaders;
1187
1188 // Rasterizer State
1189 bool viewport;
1190 bool clip_coefficient;
1191 bool cull_mode;
1192 bool primitive_restart;
1193 bool depth_test;
1194 bool stencil_test;
1195 bool blend_state;
1196 bool scissor_test;
1197 bool transform_feedback;
1198 bool color_mask;
1199 bool polygon_offset;
1200
1201 // Complementary
1202 bool viewport_transform;
1203 bool screen_y_control;
1204
1205 bool memory_general;
1206 };
1207 std::array<bool, NUM_REGS> regs;
1208 };
1209
1210 void ResetVertexArrays() {
1211 vertex_array.fill(true);
1212 vertex_array_buffers = true;
1213 }
1214
1215 void ResetRenderTargets() {
1216 depth_buffer = true;
1217 render_target.fill(true);
1218 render_settings = true;
1219 }
1133 1220
1134 void OnMemoryWrite() { 1221 void OnMemoryWrite() {
1135 zeta_buffer = true;
1136 shaders = true; 1222 shaders = true;
1137 color_buffer.set(); 1223 memory_general = true;
1138 vertex_array.set(); 1224 ResetRenderTargets();
1225 ResetVertexArrays();
1139 } 1226 }
1140 };
1141 1227
1142 DirtyFlags dirty_flags; 1228 } dirty{};
1229
1230 std::array<u8, Regs::NUM_REGS> dirty_pointers{};
1143 1231
1144 /// Reads a register value located at the input method address 1232 /// Reads a register value located at the input method address
1145 u32 GetRegisterValue(u32 method) const; 1233 u32 GetRegisterValue(u32 method) const;
@@ -1168,6 +1256,10 @@ public:
1168 return macro_memory; 1256 return macro_memory;
1169 } 1257 }
1170 1258
1259 bool ShouldExecute() const {
1260 return execute_on;
1261 }
1262
1171private: 1263private:
1172 void InitializeRegisterDefaults(); 1264 void InitializeRegisterDefaults();
1173 1265
@@ -1178,7 +1270,7 @@ private:
1178 MemoryManager& memory_manager; 1270 MemoryManager& memory_manager;
1179 1271
1180 /// Start offsets of each macro in macro_memory 1272 /// Start offsets of each macro in macro_memory
1181 std::unordered_map<u32, u32> macro_offsets; 1273 std::array<u32, 0x80> macro_positions = {};
1182 1274
1183 /// Memory for macro code 1275 /// Memory for macro code
1184 MacroMemory macro_memory; 1276 MacroMemory macro_memory;
@@ -1191,14 +1283,27 @@ private:
1191 /// Interpreter for the macro codes uploaded to the GPU. 1283 /// Interpreter for the macro codes uploaded to the GPU.
1192 MacroInterpreter macro_interpreter; 1284 MacroInterpreter macro_interpreter;
1193 1285
1286 static constexpr u32 null_cb_data = 0xFFFFFFFF;
1287 struct {
1288 std::array<std::array<u32, 0x4000>, 16> buffer;
1289 u32 current{null_cb_data};
1290 u32 id{null_cb_data};
1291 u32 start_pos{};
1292 u32 counter{};
1293 } cb_data_state;
1294
1194 Upload::State upload_state; 1295 Upload::State upload_state;
1195 1296
1297 bool execute_on{true};
1298
1196 /// Retrieves information about a specific TIC entry from the TIC buffer. 1299 /// Retrieves information about a specific TIC entry from the TIC buffer.
1197 Texture::TICEntry GetTICEntry(u32 tic_index) const; 1300 Texture::TICEntry GetTICEntry(u32 tic_index) const;
1198 1301
1199 /// Retrieves information about a specific TSC entry from the TSC buffer. 1302 /// Retrieves information about a specific TSC entry from the TSC buffer.
1200 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; 1303 Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
1201 1304
1305 void InitDirtySettings();
1306
1202 /** 1307 /**
1203 * Call a macro on this engine. 1308 * Call a macro on this engine.
1204 * @param method Method to call 1309 * @param method Method to call
@@ -1218,11 +1323,16 @@ private:
1218 /// Handles a write to the QUERY_GET register. 1323 /// Handles a write to the QUERY_GET register.
1219 void ProcessQueryGet(); 1324 void ProcessQueryGet();
1220 1325
1326 // Handles Conditional Rendering
1327 void ProcessQueryCondition();
1328
1221 /// Handles writes to syncing register. 1329 /// Handles writes to syncing register.
1222 void ProcessSyncPoint(); 1330 void ProcessSyncPoint();
1223 1331
1224 /// Handles a write to the CB_DATA[i] register. 1332 /// Handles a write to the CB_DATA[i] register.
1333 void StartCBData(u32 method);
1225 void ProcessCBData(u32 value); 1334 void ProcessCBData(u32 value);
1335 void FinishCBData();
1226 1336
1227 /// Handles a write to the CB_BIND register. 1337 /// Handles a write to the CB_BIND register.
1228 void ProcessCBBind(Regs::ShaderStage stage); 1338 void ProcessCBBind(Regs::ShaderStage stage);
@@ -1289,6 +1399,7 @@ ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
1289ASSERT_REG_POSITION(point_size, 0x546); 1399ASSERT_REG_POSITION(point_size, 0x546);
1290ASSERT_REG_POSITION(zeta_enable, 0x54E); 1400ASSERT_REG_POSITION(zeta_enable, 0x54E);
1291ASSERT_REG_POSITION(multisample_control, 0x54F); 1401ASSERT_REG_POSITION(multisample_control, 0x54F);
1402ASSERT_REG_POSITION(condition, 0x554);
1292ASSERT_REG_POSITION(tsc, 0x557); 1403ASSERT_REG_POSITION(tsc, 0x557);
1293ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1404ASSERT_REG_POSITION(polygon_offset_factor, 0x55b);
1294ASSERT_REG_POSITION(tic, 0x55D); 1405ASSERT_REG_POSITION(tic, 0x55D);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0..ad8453c5f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,18 +5,17 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/settings.h"
8#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
9#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
10#include "video_core/memory_manager.h" 11#include "video_core/memory_manager.h"
11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h" 12#include "video_core/renderer_base.h"
13#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
14 14
15namespace Tegra::Engines { 15namespace Tegra::Engines {
16 16
17MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 17MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
18 MemoryManager& memory_manager) 18 : system{system}, memory_manager{memory_manager} {}
19 : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
20 19
21void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { 20void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
22 ASSERT_MSG(method_call.method < Regs::NUM_REGS, 21 ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -38,7 +37,7 @@ void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
38} 37}
39 38
40void MaxwellDMA::HandleCopy() { 39void MaxwellDMA::HandleCopy() {
41 LOG_WARNING(HW_GPU, "Requested a DMA copy"); 40 LOG_TRACE(HW_GPU, "Requested a DMA copy");
42 41
43 const GPUVAddr source = regs.src_address.Address(); 42 const GPUVAddr source = regs.src_address.Address();
44 const GPUVAddr dest = regs.dst_address.Address(); 43 const GPUVAddr dest = regs.dst_address.Address();
@@ -58,7 +57,7 @@ void MaxwellDMA::HandleCopy() {
58 } 57 }
59 58
60 // All copies here update the main memory, so mark all rasterizer states as invalid. 59 // All copies here update the main memory, so mark all rasterizer states as invalid.
61 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 60 system.GPU().Maxwell3D().dirty.OnMemoryWrite();
62 61
63 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { 62 if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
64 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D 63 // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
@@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() {
84 ASSERT(regs.exec.enable_2d == 1); 83 ASSERT(regs.exec.enable_2d == 1);
85 84
86 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 85 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
87 ASSERT(regs.src_params.size_z == 1); 86 ASSERT(regs.src_params.BlockDepth() == 0);
88 // If the input is tiled and the output is linear, deswizzle the input and copy it over. 87 // If the input is tiled and the output is linear, deswizzle the input and copy it over.
89 const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x; 88 const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
90 const std::size_t src_size = Texture::CalculateSize( 89 const std::size_t src_size = Texture::CalculateSize(
91 true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 90 true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
92 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth()); 91 regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
93 92
93 const std::size_t src_layer_size = Texture::CalculateSize(
94 true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
95 regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
96
94 const std::size_t dst_size = regs.dst_pitch * regs.y_count; 97 const std::size_t dst_size = regs.dst_pitch * regs.y_count;
95 98
96 if (read_buffer.size() < src_size) { 99 if (read_buffer.size() < src_size) {
@@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() {
104 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 107 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
105 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 108 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
106 109
107 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 110 Texture::UnswizzleSubrect(
108 regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(), 111 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
109 write_buffer.data(), regs.src_params.BlockHeight(), 112 read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
110 regs.src_params.pos_x, regs.src_params.pos_y); 113 regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
111 114
112 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); 115 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
113 } else { 116 } else {
114 ASSERT(regs.dst_params.BlockDepth() == 0); 117 ASSERT(regs.dst_params.BlockDepth() == 0);
115 118
116 const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count; 119 const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
117 120
118 const std::size_t dst_size = Texture::CalculateSize( 121 const std::size_t dst_size = Texture::CalculateSize(
119 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 122 true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
120 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); 123 regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
121 124
122 const std::size_t dst_layer_size = Texture::CalculateSize( 125 const std::size_t dst_layer_size = Texture::CalculateSize(
123 true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1, 126 true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
124 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth()); 127 regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
125 128
126 const std::size_t src_size = regs.src_pitch * regs.y_count; 129 const std::size_t src_size = regs.src_pitch * regs.y_count;
@@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() {
133 write_buffer.resize(dst_size); 136 write_buffer.resize(dst_size);
134 } 137 }
135 138
136 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 139 if (Settings::values.use_accurate_gpu_emulation) {
137 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 140 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
141 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
142 } else {
143 memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
144 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
145 }
138 146
139 // If the input is linear and the output is tiled, swizzle the input and copy it over. 147 // If the input is linear and the output is tiled, swizzle the input and copy it over.
140 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 148 Texture::SwizzleSubrect(
141 src_bytes_per_pixel, 149 regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
142 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, 150 write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
143 read_buffer.data(), regs.dst_params.BlockHeight()); 151 regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
144 152
145 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); 153 memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
146 } 154 }
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 17b015ca7..93808a9bb 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -20,10 +20,6 @@ namespace Tegra {
20class MemoryManager; 20class MemoryManager;
21} 21}
22 22
23namespace VideoCore {
24class RasterizerInterface;
25}
26
27namespace Tegra::Engines { 23namespace Tegra::Engines {
28 24
29/** 25/**
@@ -33,8 +29,7 @@ namespace Tegra::Engines {
33 29
34class MaxwellDMA final { 30class MaxwellDMA final {
35public: 31public:
36 explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 32 explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
37 MemoryManager& memory_manager);
38 ~MaxwellDMA() = default; 33 ~MaxwellDMA() = default;
39 34
40 /// Write the value to the register identified by method. 35 /// Write the value to the register identified by method.
@@ -180,8 +175,6 @@ public:
180private: 175private:
181 Core::System& system; 176 Core::System& system;
182 177
183 VideoCore::RasterizerInterface& rasterizer;
184
185 MemoryManager& memory_manager; 178 MemoryManager& memory_manager;
186 179
187 std::vector<u8> read_buffer; 180 std::vector<u8> read_buffer;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 404d4f5aa..c3678b9ea 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,7 +78,7 @@ union Attribute {
78 constexpr explicit Attribute(u64 value) : value(value) {} 78 constexpr explicit Attribute(u64 value) : value(value) {}
79 79
80 enum class Index : u64 { 80 enum class Index : u64 {
81 PointSize = 6, 81 LayerViewportPointSize = 6,
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
@@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 {
538 Output = 1, 538 Output = 1,
539}; 539};
540 540
541enum class VoteOperation : u64 {
542 All = 0, // allThreadsNV
543 Any = 1, // anyThreadNV
544 Eq = 2, // allThreadsEqualNV
545};
546
541union Instruction { 547union Instruction {
542 Instruction& operator=(const Instruction& instr) { 548 Instruction& operator=(const Instruction& instr) {
543 value = instr.value; 549 value = instr.value;
@@ -560,6 +566,18 @@ union Instruction {
560 BitField<48, 16, u64> opcode; 566 BitField<48, 16, u64> opcode;
561 567
562 union { 568 union {
569 BitField<8, 5, ConditionCode> cc;
570 BitField<13, 1, u64> trigger;
571 } nop;
572
573 union {
574 BitField<48, 2, VoteOperation> operation;
575 BitField<45, 3, u64> dest_pred;
576 BitField<39, 3, u64> value;
577 BitField<42, 1, u64> negate_value;
578 } vote;
579
580 union {
563 BitField<8, 8, Register> gpr; 581 BitField<8, 8, Register> gpr;
564 BitField<20, 24, s64> offset; 582 BitField<20, 24, s64> offset;
565 } gmem; 583 } gmem;
@@ -868,6 +886,7 @@ union Instruction {
868 union { 886 union {
869 BitField<0, 3, u64> pred0; 887 BitField<0, 3, u64> pred0;
870 BitField<3, 3, u64> pred3; 888 BitField<3, 3, u64> pred3;
889 BitField<6, 1, u64> neg_b;
871 BitField<7, 1, u64> abs_a; 890 BitField<7, 1, u64> abs_a;
872 BitField<39, 3, u64> pred39; 891 BitField<39, 3, u64> pred39;
873 BitField<42, 1, u64> neg_pred; 892 BitField<42, 1, u64> neg_pred;
@@ -931,8 +950,6 @@ union Instruction {
931 } csetp; 950 } csetp;
932 951
933 union { 952 union {
934 BitField<35, 4, PredCondition> cond;
935 BitField<49, 1, u64> h_and;
936 BitField<6, 1, u64> ftz; 953 BitField<6, 1, u64> ftz;
937 BitField<45, 2, PredOperation> op; 954 BitField<45, 2, PredOperation> op;
938 BitField<3, 3, u64> pred3; 955 BitField<3, 3, u64> pred3;
@@ -940,9 +957,21 @@ union Instruction {
940 BitField<43, 1, u64> negate_a; 957 BitField<43, 1, u64> negate_a;
941 BitField<44, 1, u64> abs_a; 958 BitField<44, 1, u64> abs_a;
942 BitField<47, 2, HalfType> type_a; 959 BitField<47, 2, HalfType> type_a;
943 BitField<31, 1, u64> negate_b; 960 union {
944 BitField<30, 1, u64> abs_b; 961 BitField<35, 4, PredCondition> cond;
945 BitField<28, 2, HalfType> type_b; 962 BitField<49, 1, u64> h_and;
963 BitField<31, 1, u64> negate_b;
964 BitField<30, 1, u64> abs_b;
965 BitField<28, 2, HalfType> type_b;
966 } reg;
967 union {
968 BitField<56, 1, u64> negate_b;
969 BitField<54, 1, u64> abs_b;
970 } cbuf;
971 union {
972 BitField<49, 4, PredCondition> cond;
973 BitField<53, 1, u64> h_and;
974 } cbuf_and_imm;
946 BitField<42, 1, u64> neg_pred; 975 BitField<42, 1, u64> neg_pred;
947 BitField<39, 3, u64> pred39; 976 BitField<39, 3, u64> pred39;
948 } hsetp2; 977 } hsetp2;
@@ -991,7 +1020,6 @@ union Instruction {
991 } iset; 1020 } iset;
992 1021
993 union { 1022 union {
994 BitField<41, 2, u64> selector; // i2i and i2f only
995 BitField<45, 1, u64> negate_a; 1023 BitField<45, 1, u64> negate_a;
996 BitField<49, 1, u64> abs_a; 1024 BitField<49, 1, u64> abs_a;
997 BitField<10, 2, Register::Size> src_size; 1025 BitField<10, 2, Register::Size> src_size;
@@ -1008,8 +1036,6 @@ union Instruction {
1008 } f2i; 1036 } f2i;
1009 1037
1010 union { 1038 union {
1011 BitField<8, 2, Register::Size> src_size;
1012 BitField<10, 2, Register::Size> dst_size;
1013 BitField<39, 4, u64> rounding; 1039 BitField<39, 4, u64> rounding;
1014 // H0, H1 extract for F16 missing 1040 // H0, H1 extract for F16 missing
1015 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value 1041 BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
@@ -1019,6 +1045,13 @@ union Instruction {
1019 } 1045 }
1020 } f2f; 1046 } f2f;
1021 1047
1048 union {
1049 BitField<41, 2, u64> selector;
1050 } int_src;
1051
1052 union {
1053 BitField<41, 1, u64> selector;
1054 } float_src;
1022 } conversion; 1055 } conversion;
1023 1056
1024 union { 1057 union {
@@ -1278,6 +1311,7 @@ union Instruction {
1278 union { 1311 union {
1279 BitField<49, 1, u64> nodep_flag; 1312 BitField<49, 1, u64> nodep_flag;
1280 BitField<53, 4, u64> texture_info; 1313 BitField<53, 4, u64> texture_info;
1314 BitField<59, 1, u64> fp32_flag;
1281 1315
1282 TextureType GetTextureType() const { 1316 TextureType GetTextureType() const {
1283 // The TLDS instruction has a weird encoding for the texture type. 1317 // The TLDS instruction has a weird encoding for the texture type.
@@ -1368,6 +1402,20 @@ union Instruction {
1368 } bra; 1402 } bra;
1369 1403
1370 union { 1404 union {
1405 BitField<20, 24, u64> target;
1406 BitField<5, 1, u64> constant_buffer;
1407
1408 s32 GetBranchExtend() const {
1409 // Sign extend the branch target offset
1410 u32 mask = 1U << (24 - 1);
1411 u32 value = static_cast<u32>(target);
1412 // The branch offset is relative to the next instruction and is stored in bytes, so
1413 // divide it by the size of an instruction and add 1 to it.
1414 return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
1415 }
1416 } brx;
1417
1418 union {
1371 BitField<39, 1, u64> emit; // EmitVertex 1419 BitField<39, 1, u64> emit; // EmitVertex
1372 BitField<40, 1, u64> cut; // EndPrimitive 1420 BitField<40, 1, u64> cut; // EndPrimitive
1373 } out; 1421 } out;
@@ -1459,11 +1507,13 @@ public:
1459 SYNC, 1507 SYNC,
1460 BRK, 1508 BRK,
1461 DEPBAR, 1509 DEPBAR,
1510 VOTE,
1462 BFE_C, 1511 BFE_C,
1463 BFE_R, 1512 BFE_R,
1464 BFE_IMM, 1513 BFE_IMM,
1465 BFI_IMM_R, 1514 BFI_IMM_R,
1466 BRA, 1515 BRA,
1516 BRX,
1467 PBK, 1517 PBK,
1468 LD_A, 1518 LD_A,
1469 LD_L, 1519 LD_L,
@@ -1490,6 +1540,7 @@ public:
1490 TMML, // Texture Mip Map Level 1540 TMML, // Texture Mip Map Level
1491 SUST, // Surface Store 1541 SUST, // Surface Store
1492 EXIT, 1542 EXIT,
1543 NOP,
1493 IPA, 1544 IPA,
1494 OUT_R, // Emit vertex/primitive 1545 OUT_R, // Emit vertex/primitive
1495 ISBERD, 1546 ISBERD,
@@ -1532,7 +1583,9 @@ public:
1532 HFMA2_RC, 1583 HFMA2_RC,
1533 HFMA2_RR, 1584 HFMA2_RR,
1534 HFMA2_IMM_R, 1585 HFMA2_IMM_R,
1586 HSETP2_C,
1535 HSETP2_R, 1587 HSETP2_R,
1588 HSETP2_IMM,
1536 HSET2_R, 1589 HSET2_R,
1537 POPC_C, 1590 POPC_C,
1538 POPC_R, 1591 POPC_R,
@@ -1617,6 +1670,7 @@ public:
1617 Hfma2, 1670 Hfma2,
1618 Flow, 1671 Flow,
1619 Synch, 1672 Synch,
1673 Warp,
1620 Memory, 1674 Memory,
1621 Texture, 1675 Texture,
1622 Image, 1676 Image,
@@ -1738,10 +1792,12 @@ private:
1738 INST("111000101001----", Id::SSY, Type::Flow, "SSY"), 1792 INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
1739 INST("111000101010----", Id::PBK, Type::Flow, "PBK"), 1793 INST("111000101010----", Id::PBK, Type::Flow, "PBK"),
1740 INST("111000100100----", Id::BRA, Type::Flow, "BRA"), 1794 INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
1795 INST("111000100101----", Id::BRX, Type::Flow, "BRX"),
1741 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"), 1796 INST("1111000011111---", Id::SYNC, Type::Flow, "SYNC"),
1742 INST("111000110100---", Id::BRK, Type::Flow, "BRK"), 1797 INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
1743 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 1798 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
1744 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 1799 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
1800 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
1745 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 1801 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
1746 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1802 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1747 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1803 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1760,12 +1816,13 @@ private:
1760 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), 1816 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
1761 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), 1817 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1762 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), 1818 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
1763 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), 1819 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
1764 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), 1820 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1765 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), 1821 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1766 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), 1822 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1767 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), 1823 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1768 INST("11101011001-----", Id::SUST, Type::Image, "SUST"), 1824 INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
1825 INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
1769 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1826 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1770 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1827 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
1771 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 1828 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
@@ -1814,7 +1871,9 @@ private:
1814 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), 1871 INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
1815 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), 1872 INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
1816 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), 1873 INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
1817 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), 1874 INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
1875 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
1876 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
1818 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 1877 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
1819 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 1878 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
1820 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 1879 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 52706505b..2c47541cb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,26 +17,15 @@
17 17
18namespace Tegra { 18namespace Tegra {
19 19
20u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { 20GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
21 switch (format) { 21 : system{system}, renderer{renderer}, is_async{is_async} {
22 case PixelFormat::ABGR8:
23 case PixelFormat::BGRA8:
24 return 4;
25 default:
26 return 4;
27 }
28
29 UNREACHABLE();
30}
31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
33 auto& rasterizer{renderer.Rasterizer()}; 22 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); 23 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 24 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 25 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 26 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
38 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); 27 kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
39 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); 28 maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
40 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); 29 kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
41} 30}
42 31
@@ -50,6 +39,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const {
50 return *maxwell_3d; 39 return *maxwell_3d;
51} 40}
52 41
42Engines::KeplerCompute& GPU::KeplerCompute() {
43 return *kepler_compute;
44}
45
46const Engines::KeplerCompute& GPU::KeplerCompute() const {
47 return *kepler_compute;
48}
49
53MemoryManager& GPU::MemoryManager() { 50MemoryManager& GPU::MemoryManager() {
54 return *memory_manager; 51 return *memory_manager;
55} 52}
@@ -66,6 +63,55 @@ const DmaPusher& GPU::DmaPusher() const {
66 return *dma_pusher; 63 return *dma_pusher;
67} 64}
68 65
66void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
67 syncpoints[syncpoint_id]++;
68 std::lock_guard lock{sync_mutex};
69 if (!syncpt_interrupts[syncpoint_id].empty()) {
70 u32 value = syncpoints[syncpoint_id].load();
71 auto it = syncpt_interrupts[syncpoint_id].begin();
72 while (it != syncpt_interrupts[syncpoint_id].end()) {
73 if (value >= *it) {
74 TriggerCpuInterrupt(syncpoint_id, *it);
75 it = syncpt_interrupts[syncpoint_id].erase(it);
76 continue;
77 }
78 it++;
79 }
80 }
81}
82
83u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
84 return syncpoints[syncpoint_id].load();
85}
86
87void GPU::RegisterSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
88 auto& interrupt = syncpt_interrupts[syncpoint_id];
89 bool contains = std::any_of(interrupt.begin(), interrupt.end(),
90 [value](u32 in_value) { return in_value == value; });
91 if (contains) {
92 return;
93 }
94 syncpt_interrupts[syncpoint_id].emplace_back(value);
95}
96
97bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
98 std::lock_guard lock{sync_mutex};
99 auto& interrupt = syncpt_interrupts[syncpoint_id];
100 const auto iter =
101 std::find_if(interrupt.begin(), interrupt.end(),
102 [value](u32 interrupt_value) { return value == interrupt_value; });
103
104 if (iter == interrupt.end()) {
105 return false;
106 }
107 interrupt.erase(iter);
108 return true;
109}
110
111void GPU::FlushCommands() {
112 renderer.Rasterizer().FlushCommands();
113}
114
69u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { 115u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
70 ASSERT(format != RenderTargetFormat::NONE); 116 ASSERT(format != RenderTargetFormat::NONE);
71 117
@@ -143,12 +189,12 @@ enum class BufferMethods {
143 NotifyIntr = 0x8, 189 NotifyIntr = 0x8,
144 WrcacheFlush = 0x9, 190 WrcacheFlush = 0x9,
145 Unk28 = 0xA, 191 Unk28 = 0xA,
146 Unk2c = 0xB, 192 UnkCacheFlush = 0xB,
147 RefCnt = 0x14, 193 RefCnt = 0x14,
148 SemaphoreAcquire = 0x1A, 194 SemaphoreAcquire = 0x1A,
149 SemaphoreRelease = 0x1B, 195 SemaphoreRelease = 0x1B,
150 Unk70 = 0x1C, 196 FenceValue = 0x1C,
151 Unk74 = 0x1D, 197 FenceAction = 0x1D,
152 Unk78 = 0x1E, 198 Unk78 = 0x1E,
153 Unk7c = 0x1F, 199 Unk7c = 0x1F,
154 Yield = 0x20, 200 Yield = 0x20,
@@ -194,6 +240,10 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
194 case BufferMethods::SemaphoreAddressLow: 240 case BufferMethods::SemaphoreAddressLow:
195 case BufferMethods::SemaphoreSequence: 241 case BufferMethods::SemaphoreSequence:
196 case BufferMethods::RefCnt: 242 case BufferMethods::RefCnt:
243 case BufferMethods::UnkCacheFlush:
244 case BufferMethods::WrcacheFlush:
245 case BufferMethods::FenceValue:
246 case BufferMethods::FenceAction:
197 break; 247 break;
198 case BufferMethods::SemaphoreTrigger: { 248 case BufferMethods::SemaphoreTrigger: {
199 ProcessSemaphoreTriggerMethod(); 249 ProcessSemaphoreTriggerMethod();
@@ -204,21 +254,11 @@ void GPU::CallPullerMethod(const MethodCall& method_call) {
204 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); 254 LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
205 break; 255 break;
206 } 256 }
207 case BufferMethods::WrcacheFlush: {
208 // TODO(Kmather73): Research and implement this method.
209 LOG_ERROR(HW_GPU, "Special puller engine method WrcacheFlush not implemented");
210 break;
211 }
212 case BufferMethods::Unk28: { 257 case BufferMethods::Unk28: {
213 // TODO(Kmather73): Research and implement this method. 258 // TODO(Kmather73): Research and implement this method.
214 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); 259 LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
215 break; 260 break;
216 } 261 }
217 case BufferMethods::Unk2c: {
218 // TODO(Kmather73): Research and implement this method.
219 LOG_ERROR(HW_GPU, "Special puller engine method Unk2c not implemented");
220 break;
221 }
222 case BufferMethods::SemaphoreAcquire: { 262 case BufferMethods::SemaphoreAcquire: {
223 ProcessSemaphoreAcquire(); 263 ProcessSemaphoreAcquire();
224 break; 264 break;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fe6628923..78bc0601a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -5,8 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <atomic>
9#include <list>
8#include <memory> 10#include <memory>
11#include <mutex>
9#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/hle/service/nvdrv/nvdata.h"
10#include "core/hle/service/nvflinger/buffer_queue.h" 14#include "core/hle/service/nvflinger/buffer_queue.h"
11#include "video_core/dma_pusher.h" 15#include "video_core/dma_pusher.h"
12 16
@@ -15,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) {
15 return reinterpret_cast<CacheAddr>(host_ptr); 19 return reinterpret_cast<CacheAddr>(host_ptr);
16} 20}
17 21
22inline u8* FromCacheAddr(CacheAddr cache_addr) {
23 return reinterpret_cast<u8*>(cache_addr);
24}
25
18namespace Core { 26namespace Core {
19class System; 27class System;
20} 28}
@@ -87,14 +95,10 @@ class DebugContext;
87struct FramebufferConfig { 95struct FramebufferConfig {
88 enum class PixelFormat : u32 { 96 enum class PixelFormat : u32 {
89 ABGR8 = 1, 97 ABGR8 = 1,
98 RGB565 = 4,
90 BGRA8 = 5, 99 BGRA8 = 5,
91 }; 100 };
92 101
93 /**
94 * Returns the number of bytes per pixel.
95 */
96 static u32 BytesPerPixel(PixelFormat format);
97
98 VAddr address; 102 VAddr address;
99 u32 offset; 103 u32 offset;
100 u32 width; 104 u32 width;
@@ -127,7 +131,7 @@ class MemoryManager;
127 131
128class GPU { 132class GPU {
129public: 133public:
130 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); 134 explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async);
131 135
132 virtual ~GPU(); 136 virtual ~GPU();
133 137
@@ -149,12 +153,20 @@ public:
149 /// Calls a GPU method. 153 /// Calls a GPU method.
150 void CallMethod(const MethodCall& method_call); 154 void CallMethod(const MethodCall& method_call);
151 155
156 void FlushCommands();
157
152 /// Returns a reference to the Maxwell3D GPU engine. 158 /// Returns a reference to the Maxwell3D GPU engine.
153 Engines::Maxwell3D& Maxwell3D(); 159 Engines::Maxwell3D& Maxwell3D();
154 160
155 /// Returns a const reference to the Maxwell3D GPU engine. 161 /// Returns a const reference to the Maxwell3D GPU engine.
156 const Engines::Maxwell3D& Maxwell3D() const; 162 const Engines::Maxwell3D& Maxwell3D() const;
157 163
164 /// Returns a reference to the KeplerCompute GPU engine.
165 Engines::KeplerCompute& KeplerCompute();
166
167 /// Returns a reference to the KeplerCompute GPU engine.
168 const Engines::KeplerCompute& KeplerCompute() const;
169
158 /// Returns a reference to the GPU memory manager. 170 /// Returns a reference to the GPU memory manager.
159 Tegra::MemoryManager& MemoryManager(); 171 Tegra::MemoryManager& MemoryManager();
160 172
@@ -164,6 +176,22 @@ public:
164 /// Returns a reference to the GPU DMA pusher. 176 /// Returns a reference to the GPU DMA pusher.
165 Tegra::DmaPusher& DmaPusher(); 177 Tegra::DmaPusher& DmaPusher();
166 178
179 void IncrementSyncPoint(u32 syncpoint_id);
180
181 u32 GetSyncpointValue(u32 syncpoint_id) const;
182
183 void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
184
185 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
186
187 std::unique_lock<std::mutex> LockSync() {
188 return std::unique_lock{sync_mutex};
189 }
190
191 bool IsAsync() const {
192 return is_async;
193 }
194
167 /// Returns a const reference to the GPU DMA pusher. 195 /// Returns a const reference to the GPU DMA pusher.
168 const Tegra::DmaPusher& DmaPusher() const; 196 const Tegra::DmaPusher& DmaPusher() const;
169 197
@@ -194,7 +222,12 @@ public:
194 222
195 u32 semaphore_acquire; 223 u32 semaphore_acquire;
196 u32 semaphore_release; 224 u32 semaphore_release;
197 INSERT_PADDING_WORDS(0xE4); 225 u32 fence_value;
226 union {
227 BitField<4, 4, u32> operation;
228 BitField<8, 8, u32> id;
229 } fence_action;
230 INSERT_PADDING_WORDS(0xE2);
198 231
199 // Puller state 232 // Puller state
200 u32 acquire_mode; 233 u32 acquire_mode;
@@ -216,8 +249,7 @@ public:
216 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; 249 virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
217 250
218 /// Swap buffers (render frame) 251 /// Swap buffers (render frame)
219 virtual void SwapBuffers( 252 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
220 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
221 253
222 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 254 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
223 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 255 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
@@ -228,6 +260,9 @@ public:
228 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 260 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
229 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 261 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
230 262
263protected:
264 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
265
231private: 266private:
232 void ProcessBindMethod(const MethodCall& method_call); 267 void ProcessBindMethod(const MethodCall& method_call);
233 void ProcessSemaphoreTriggerMethod(); 268 void ProcessSemaphoreTriggerMethod();
@@ -245,6 +280,7 @@ private:
245 280
246protected: 281protected:
247 std::unique_ptr<Tegra::DmaPusher> dma_pusher; 282 std::unique_ptr<Tegra::DmaPusher> dma_pusher;
283 Core::System& system;
248 VideoCore::RendererBase& renderer; 284 VideoCore::RendererBase& renderer;
249 285
250private: 286private:
@@ -262,6 +298,14 @@ private:
262 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; 298 std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
263 /// Inline memory engine 299 /// Inline memory engine
264 std::unique_ptr<Engines::KeplerMemory> kepler_memory; 300 std::unique_ptr<Engines::KeplerMemory> kepler_memory;
301
302 std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
303
304 std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
305
306 std::mutex sync_mutex;
307
308 const bool is_async;
265}; 309};
266 310
267#define ASSERT_REG_POSITION(field_name, position) \ 311#define ASSERT_REG_POSITION(field_name, position) \
@@ -274,6 +318,8 @@ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
274ASSERT_REG_POSITION(reference_count, 0x14); 318ASSERT_REG_POSITION(reference_count, 0x14);
275ASSERT_REG_POSITION(semaphore_acquire, 0x1A); 319ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
276ASSERT_REG_POSITION(semaphore_release, 0x1B); 320ASSERT_REG_POSITION(semaphore_release, 0x1B);
321ASSERT_REG_POSITION(fence_value, 0x1C);
322ASSERT_REG_POSITION(fence_action, 0x1D);
277 323
278ASSERT_REG_POSITION(acquire_mode, 0x100); 324ASSERT_REG_POSITION(acquire_mode, 0x100);
279ASSERT_REG_POSITION(acquire_source, 0x101); 325ASSERT_REG_POSITION(acquire_source, 0x101);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index d4e2553a9..f2a3a390e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/core.h"
6#include "core/hardware_interrupt_manager.h"
5#include "video_core/gpu_asynch.h" 7#include "video_core/gpu_asynch.h"
6#include "video_core/gpu_thread.h" 8#include "video_core/gpu_thread.h"
7#include "video_core/renderer_base.h" 9#include "video_core/renderer_base.h"
@@ -9,7 +11,7 @@
9namespace VideoCommon { 11namespace VideoCommon {
10 12
11GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) 13GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
12 : GPU(system, renderer), gpu_thread{system} {} 14 : GPU(system, renderer, true), gpu_thread{system} {}
13 15
14GPUAsynch::~GPUAsynch() = default; 16GPUAsynch::~GPUAsynch() = default;
15 17
@@ -21,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
21 gpu_thread.SubmitList(std::move(entries)); 23 gpu_thread.SubmitList(std::move(entries));
22} 24}
23 25
24void GPUAsynch::SwapBuffers( 26void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
25 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 27 gpu_thread.SwapBuffers(framebuffer);
26 gpu_thread.SwapBuffers(std::move(framebuffer));
27} 28}
28 29
29void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { 30void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
@@ -38,4 +39,9 @@ void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
38 gpu_thread.FlushAndInvalidateRegion(addr, size); 39 gpu_thread.FlushAndInvalidateRegion(addr, size);
39} 40}
40 41
42void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const {
43 auto& interrupt_manager = system.InterruptManager();
44 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
45}
46
41} // namespace VideoCommon 47} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 30be74cba..a12f9bac4 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -14,19 +14,21 @@ class RendererBase;
14namespace VideoCommon { 14namespace VideoCommon {
15 15
16/// Implementation of GPU interface that runs the GPU asynchronously 16/// Implementation of GPU interface that runs the GPU asynchronously
17class GPUAsynch : public Tegra::GPU { 17class GPUAsynch final : public Tegra::GPU {
18public: 18public:
19 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); 19 explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
20 ~GPUAsynch() override; 20 ~GPUAsynch() override;
21 21
22 void Start() override; 22 void Start() override;
23 void PushGPUEntries(Tegra::CommandList&& entries) override; 23 void PushGPUEntries(Tegra::CommandList&& entries) override;
24 void SwapBuffers( 24 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
25 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
26 void FlushRegion(CacheAddr addr, u64 size) override; 25 void FlushRegion(CacheAddr addr, u64 size) override;
27 void InvalidateRegion(CacheAddr addr, u64 size) override; 26 void InvalidateRegion(CacheAddr addr, u64 size) override;
28 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
29 28
29protected:
30 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
31
30private: 32private:
31 GPUThread::ThreadManager gpu_thread; 33 GPUThread::ThreadManager gpu_thread;
32}; 34};
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 45e43b1dc..d48221077 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -8,7 +8,7 @@
8namespace VideoCommon { 8namespace VideoCommon {
9 9
10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) 10GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
11 : GPU(system, renderer) {} 11 : GPU(system, renderer, false) {}
12 12
13GPUSynch::~GPUSynch() = default; 13GPUSynch::~GPUSynch() = default;
14 14
@@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
19 dma_pusher->DispatchCalls(); 19 dma_pusher->DispatchCalls();
20} 20}
21 21
22void GPUSynch::SwapBuffers( 22void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 23 renderer.SwapBuffers(framebuffer);
24 renderer.SwapBuffers(std::move(framebuffer));
25} 24}
26 25
27void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { 26void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 3031fcf72..5eb1c461c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -13,18 +13,21 @@ class RendererBase;
13namespace VideoCommon { 13namespace VideoCommon {
14 14
15/// Implementation of GPU interface that runs the GPU synchronously 15/// Implementation of GPU interface that runs the GPU synchronously
16class GPUSynch : public Tegra::GPU { 16class GPUSynch final : public Tegra::GPU {
17public: 17public:
18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); 18 explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
19 ~GPUSynch() override; 19 ~GPUSynch() override;
20 20
21 void Start() override; 21 void Start() override;
22 void PushGPUEntries(Tegra::CommandList&& entries) override; 22 void PushGPUEntries(Tegra::CommandList&& entries) override;
23 void SwapBuffers( 23 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
24 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
25 void FlushRegion(CacheAddr addr, u64 size) override; 24 void FlushRegion(CacheAddr addr, u64 size) override;
26 void InvalidateRegion(CacheAddr addr, u64 size) override; 25 void InvalidateRegion(CacheAddr addr, u64 size) override;
27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27
28protected:
29 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
30 [[maybe_unused]] u32 value) const override {}
28}; 31};
29 32
30} // namespace VideoCommon 33} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 3f0939ec9..5f039e4fd 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -21,7 +21,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
21 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
22 22
23 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
24 state.WaitForCommands(); 24 while (state.queue.Empty())
25 ;
25 26
26 // If emulation was stopped during disk shader loading, abort before trying to acquire context 27 // If emulation was stopped during disk shader loading, abort before trying to acquire context
27 if (!state.is_running) { 28 if (!state.is_running) {
@@ -32,14 +33,13 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
32 33
33 CommandDataContainer next; 34 CommandDataContainer next;
34 while (state.is_running) { 35 while (state.is_running) {
35 state.WaitForCommands();
36 while (!state.queue.Empty()) { 36 while (!state.queue.Empty()) {
37 state.queue.Pop(next); 37 state.queue.Pop(next);
38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { 38 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
39 dma_pusher.Push(std::move(submit_list->entries)); 39 dma_pusher.Push(std::move(submit_list->entries));
40 dma_pusher.DispatchCalls(); 40 dma_pusher.DispatchCalls();
41 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 41 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
42 renderer.SwapBuffers(std::move(data->framebuffer)); 42 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
43 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 43 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
44 renderer.Rasterizer().FlushRegion(data->addr, data->size); 44 renderer.Rasterizer().FlushRegion(data->addr, data->size);
45 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 45 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -49,8 +49,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
49 } else { 49 } else {
50 UNREACHABLE(); 50 UNREACHABLE();
51 } 51 }
52 state.signaled_fence = next.fence; 52 state.signaled_fence.store(next.fence);
53 state.TrySynchronize();
54 } 53 }
55 } 54 }
56} 55}
@@ -79,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
79 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); 78 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
80} 79}
81 80
82void ThreadManager::SwapBuffers( 81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
83 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 82 PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer
84 PushCommand(SwapBuffersCommand(std::move(framebuffer))); 83 : std::optional<const Tegra::FramebufferConfig>{}));
85} 84}
86 85
87void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { 86void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
@@ -89,12 +88,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
89} 88}
90 89
91void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 90void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
92 if (state.queue.Empty()) { 91 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
93 // It's quicker to invalidate a single region on the CPU if the queue is already empty
94 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
95 } else {
96 PushCommand(InvalidateRegionCommand(addr, size));
97 }
98} 92}
99 93
100void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 94void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
@@ -105,22 +99,13 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
105u64 ThreadManager::PushCommand(CommandData&& command_data) { 99u64 ThreadManager::PushCommand(CommandData&& command_data) {
106 const u64 fence{++state.last_fence}; 100 const u64 fence{++state.last_fence};
107 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 101 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
108 state.SignalCommands();
109 return fence; 102 return fence;
110} 103}
111 104
112MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); 105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
113void SynchState::WaitForSynchronization(u64 fence) { 106void SynchState::WaitForSynchronization(u64 fence) {
114 if (signaled_fence >= fence) { 107 while (signaled_fence.load() < fence)
115 return; 108 ;
116 }
117
118 // Wait for the GPU to be idle (all commands to be executed)
119 {
120 MICROPROFILE_SCOPE(GPU_wait);
121 std::unique_lock lock{synchronization_mutex};
122 synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; });
123 }
124} 109}
125 110
126} // namespace VideoCommon::GPUThread 111} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 05a168a72..3ae0ec9f3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -88,41 +88,9 @@ struct CommandDataContainer {
88/// Struct used to synchronize the GPU thread 88/// Struct used to synchronize the GPU thread
89struct SynchState final { 89struct SynchState final {
90 std::atomic_bool is_running{true}; 90 std::atomic_bool is_running{true};
91 std::atomic_int queued_frame_count{};
92 std::mutex synchronization_mutex;
93 std::mutex commands_mutex;
94 std::condition_variable commands_condition;
95 std::condition_variable synchronization_condition;
96
97 /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
98 /// synchronized. This is entirely empirical.
99 bool IsSynchronized() const {
100 constexpr std::size_t max_queue_gap{5};
101 return queue.Size() <= max_queue_gap;
102 }
103
104 void TrySynchronize() {
105 if (IsSynchronized()) {
106 std::lock_guard lock{synchronization_mutex};
107 synchronization_condition.notify_one();
108 }
109 }
110 91
111 void WaitForSynchronization(u64 fence); 92 void WaitForSynchronization(u64 fence);
112 93
113 void SignalCommands() {
114 if (queue.Empty()) {
115 return;
116 }
117
118 commands_condition.notify_one();
119 }
120
121 void WaitForCommands() {
122 std::unique_lock lock{commands_mutex};
123 commands_condition.wait(lock, [this] { return !queue.Empty(); });
124 }
125
126 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
127 CommandQueue queue; 95 CommandQueue queue;
128 u64 last_fence{}; 96 u64 last_fence{};
@@ -142,8 +110,7 @@ public:
142 void SubmitList(Tegra::CommandList&& entries); 110 void SubmitList(Tegra::CommandList&& entries);
143 111
144 /// Swap buffers (render frame) 112 /// Swap buffers (render frame)
145 void SwapBuffers( 113 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
146 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
147 114
148 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 115 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
149 void FlushRegion(CacheAddr addr, u64 size); 116 void FlushRegion(CacheAddr addr, u64 size);
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index c766ed692..9f59a2dc1 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -4,14 +4,18 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/microprofile.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro_interpreter.h" 9#include "video_core/macro_interpreter.h"
9 10
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12
10namespace Tegra { 13namespace Tegra {
11 14
12MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 15MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
13 16
14void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { 17void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
18 MICROPROFILE_SCOPE(MacroInterp);
15 Reset(); 19 Reset();
16 registers[1] = parameters[0]; 20 registers[1] = parameters[0];
17 this->parameters = std::move(parameters); 21 this->parameters = std::move(parameters);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 322453116..bffae940c 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,13 +5,17 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
9#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/vm_manager.h"
8#include "core/memory.h" 11#include "core/memory.h"
9#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
11 14
12namespace Tegra { 15namespace Tegra {
13 16
14MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { 17MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
18 : rasterizer{rasterizer}, system{system} {
15 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 19 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
16 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 20 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
17 Common::PageType::Unmapped); 21 Common::PageType::Unmapped);
@@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
49 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; 53 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
50 54
51 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 55 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
56 ASSERT(system.CurrentProcess()
57 ->VMManager()
58 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
59 Kernel::MemoryAttribute::DeviceMapped)
60 .IsSuccess());
52 61
53 return gpu_addr; 62 return gpu_addr;
54} 63}
@@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
59 const u64 aligned_size{Common::AlignUp(size, page_size)}; 68 const u64 aligned_size{Common::AlignUp(size, page_size)};
60 69
61 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 70 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
62 71 ASSERT(system.CurrentProcess()
72 ->VMManager()
73 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
74 Kernel::MemoryAttribute::DeviceMapped)
75 .IsSuccess());
63 return gpu_addr; 76 return gpu_addr;
64} 77}
65 78
@@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
68 81
69 const u64 aligned_size{Common::AlignUp(size, page_size)}; 82 const u64 aligned_size{Common::AlignUp(size, page_size)};
70 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; 83 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
85 ASSERT(cpu_addr);
71 86
72 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); 87 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
73 UnmapRange(gpu_addr, aligned_size); 88 UnmapRange(gpu_addr, aligned_size);
89 ASSERT(system.CurrentProcess()
90 ->VMManager()
91 .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped,
92 Kernel::MemoryAttribute::None)
93 .IsSuccess());
74 94
75 return gpu_addr; 95 return gpu_addr;
76} 96}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 43a84bd52..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -14,6 +14,10 @@ namespace VideoCore {
14class RasterizerInterface; 14class RasterizerInterface;
15} 15}
16 16
17namespace Core {
18class System;
19}
20
17namespace Tegra { 21namespace Tegra {
18 22
19/** 23/**
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
47 51
48class MemoryManager final { 52class MemoryManager final {
49public: 53public:
50 explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); 54 explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
51 ~MemoryManager(); 55 ~MemoryManager();
52 56
53 GPUVAddr AllocateSpace(u64 size, u64 align); 57 GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -173,6 +177,8 @@ private:
173 Common::PageTable page_table{page_bits}; 177 Common::PageTable page_table{page_bits};
174 VMAMap vma_map; 178 VMAMap vma_map;
175 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180
181 Core::System& system;
176}; 182};
177 183
178} // namespace Tegra 184} // namespace Tegra
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 3e91cbc83..084f85e67 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
25 25
26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
27 // pixel values. 27 // pixel values.
28 const u32 tile_size_x{GetDefaultBlockWidth(format)}; 28 constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
29 const u32 tile_size_y{GetDefaultBlockHeight(format)}; 29 constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
30 30
31 if constexpr (morton_to_linear) { 31 if constexpr (morton_to_linear) {
32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel, 32 Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
@@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
186 return morton_to_linear_fns[static_cast<std::size_t>(format)]; 186 return morton_to_linear_fns[static_cast<std::size_t>(format)];
187} 187}
188 188
189static u32 MortonInterleave128(u32 x, u32 y) {
190 // 128x128 Z-Order coordinate from 2D coordinates
191 static constexpr u32 xlut[] = {
192 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
193 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
194 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
195 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
196 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
197 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
198 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
199 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
200 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
201 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
202 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
203 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
204 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
205 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
206 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
207 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
208 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
209 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
210 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
211 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
212 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
213 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
214 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
215 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
216 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
217 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
218 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
219 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
220 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
221 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
222 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
223 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
224 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
225 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
226 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
227 };
228 static constexpr u32 ylut[] = {
229 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
230 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
231 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
232 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
233 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
234 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
235 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
236 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
237 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
238 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
239 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
240 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
241 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
242 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
243 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
244 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
245 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
246 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
247 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
248 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
249 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
250 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
251 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
252 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
253 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
254 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
255 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
256 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
257 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
258 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
259 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
260 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
261 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
262 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
263 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
264 };
265 return xlut[x % 128] + ylut[y % 128];
266}
267
268static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
269 // Calculates the offset of the position of the pixel in Morton order
270 // Framebuffer images are split into 128x128 tiles.
271
272 constexpr u32 block_height = 128;
273 const u32 coarse_x = x & ~127;
274
275 const u32 i = MortonInterleave128(x, y);
276
277 const u32 offset = coarse_x * block_height;
278
279 return (i + offset) * bytes_per_pixel;
280}
281
282void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 189void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
283 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 190 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
284 u8* buffer, u8* addr) { 191 u8* buffer, u8* addr) {
@@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri
286 tile_width_spacing, buffer, addr); 193 tile_width_spacing, buffer, addr);
287} 194}
288 195
289void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
290 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
291 const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
292 u8* data_ptrs[2];
293 for (u32 y = 0; y < height; ++y) {
294 for (u32 x = 0; x < width; ++x) {
295 const u32 coarse_y = y & ~127;
296 const u32 morton_offset =
297 GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
298 const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
299
300 data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
301 data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
302
303 std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
304 }
305 }
306}
307
308} // namespace VideoCore 196} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index ee5b45555..b714a7e3f 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, u8* addr); 16 u8* buffer, u8* addr);
17 17
18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
20
21} // namespace VideoCore 18} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5ee4f8e8e..6b3f2d50a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -34,6 +34,9 @@ public:
34 /// Clear the current framebuffer 34 /// Clear the current framebuffer
35 virtual void Clear() = 0; 35 virtual void Clear() = 0;
36 36
37 /// Dispatches a compute shader invocation
38 virtual void DispatchCompute(GPUVAddr code_addr) = 0;
39
37 /// Notify rasterizer that all caches should be flushed to Switch memory 40 /// Notify rasterizer that all caches should be flushed to Switch memory
38 virtual void FlushAll() = 0; 41 virtual void FlushAll() = 0;
39 42
@@ -47,6 +50,12 @@ public:
47 /// and invalidated 50 /// and invalidated
48 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 51 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
49 52
53 /// Notify the rasterizer to send all written commands to the host GPU.
54 virtual void FlushCommands() = 0;
55
56 /// Notify rasterizer that a frame is about to finish
57 virtual void TickFrame() = 0;
58
50 /// Attempt to use a faster method to perform a surface copy 59 /// Attempt to use a faster method to perform a surface copy
51 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 60 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
52 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 61 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d54c3723..af1bebc4f 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -36,8 +36,7 @@ public:
36 virtual ~RendererBase(); 36 virtual ~RendererBase();
37 37
38 /// Swap buffers (render frame) 38 /// Swap buffers (render frame)
39 virtual void SwapBuffers( 39 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
40 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
41 40
42 /// Initialize the renderer 41 /// Initialize the renderer
43 virtual bool Init() = 0; 42 virtual bool Init() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2b9bd142e..f8a807c84 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,103 +2,71 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
7 6
8#include "common/alignment.h" 7#include <glad/glad.h>
9#include "core/core.h" 8
10#include "video_core/memory_manager.h" 9#include "common/assert.h"
10#include "common/microprofile.h"
11#include "video_core/rasterizer_interface.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 12#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 13#include "video_core/renderer_opengl/gl_rasterizer.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
13 15
14namespace OpenGL { 16namespace OpenGL {
15 17
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, 18MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
17 std::size_t alignment, u8* host_ptr) 19
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, 20CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
19 alignment{alignment} {} 21 : VideoCommon::BufferBlock{cache_addr, size} {
20 22 gl_buffer.Create();
21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 23 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
23
24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
25 bool cache) {
26 std::lock_guard lock{mutex};
27 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
28
29 // Cache management is a big overhead, so only cache entries with a given size.
30 // TODO: Figure out which size is the best for given games.
31 cache &= size >= 2048;
32
33 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
34 if (cache) {
35 auto entry = TryGet(host_ptr);
36 if (entry) {
37 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
38 return entry->GetOffset();
39 }
40 Unregister(entry);
41 }
42 }
43
44 AlignBuffer(alignment);
45 const GLintptr uploaded_offset = buffer_offset;
46
47 if (!host_ptr) {
48 return uploaded_offset;
49 }
50
51 std::memcpy(buffer_ptr, host_ptr, size);
52 buffer_ptr += size;
53 buffer_offset += size;
54
55 if (cache) {
56 auto entry = std::make_shared<CachedBufferEntry>(
57 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
58 Register(entry);
59 }
60
61 return uploaded_offset;
62} 24}
63 25
64GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, 26CachedBufferBlock::~CachedBufferBlock() = default;
65 std::size_t alignment) { 27
66 std::lock_guard lock{mutex}; 28OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
67 AlignBuffer(alignment); 29 std::size_t stream_size)
68 std::memcpy(buffer_ptr, raw_pointer, size); 30 : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
69 const GLintptr uploaded_offset = buffer_offset; 31 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
32
33OGLBufferCache::~OGLBufferCache() = default;
70 34
71 buffer_ptr += size; 35Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
72 buffer_offset += size; 36 return std::make_shared<CachedBufferBlock>(cache_addr, size);
73 return uploaded_offset;
74} 37}
75 38
76bool OGLBufferCache::Map(std::size_t max_size) { 39void OGLBufferCache::WriteBarrier() {
77 bool invalidate; 40 glMemoryBarrier(GL_ALL_BARRIER_BITS);
78 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 41}
79 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); 42
80 buffer_offset = buffer_offset_base; 43const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
44 return buffer->GetHandle();
45}
81 46
82 if (invalidate) { 47const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
83 InvalidateAll(); 48 static const GLuint null_buffer = 0;
84 } 49 return &null_buffer;
85 return invalidate;
86} 50}
87 51
88void OGLBufferCache::Unmap() { 52void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
89 stream_buffer.Unmap(buffer_offset - buffer_offset_base); 53 const u8* data) {
54 glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
55 static_cast<GLsizeiptr>(size), data);
90} 56}
91 57
92GLuint OGLBufferCache::GetHandle() const { 58void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
93 return stream_buffer.GetHandle(); 59 u8* data) {
60 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
61 glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
62 static_cast<GLsizeiptr>(size), data);
94} 63}
95 64
96void OGLBufferCache::AlignBuffer(std::size_t alignment) { 65void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
97 // Align the offset, not the mapped pointer 66 std::size_t dst_offset, std::size_t size) {
98 const GLintptr offset_aligned = 67 glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
99 static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); 68 static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
100 buffer_ptr += offset_aligned - buffer_offset; 69 static_cast<GLsizeiptr>(size));
101 buffer_offset = offset_aligned;
102} 70}
103 71
104} // namespace OpenGL 72} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f2347581b..022e7bfa9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,80 +4,63 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8#include <memory> 7#include <memory>
9#include <tuple>
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 14
15namespace Core {
16class System;
17}
18
16namespace OpenGL { 19namespace OpenGL {
17 20
21class OGLStreamBuffer;
18class RasterizerOpenGL; 22class RasterizerOpenGL;
19 23
20class CachedBufferEntry final : public RasterizerCacheObject { 24class CachedBufferBlock;
21public:
22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
27 }
28 25
29 std::size_t GetSizeInBytes() const override { 26using Buffer = std::shared_ptr<CachedBufferBlock>;
30 return size;
31 }
32
33 std::size_t GetSize() const {
34 return size;
35 }
36 27
37 GLintptr GetOffset() const { 28class CachedBufferBlock : public VideoCommon::BufferBlock {
38 return offset; 29public:
39 } 30 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
31 ~CachedBufferBlock();
40 32
41 std::size_t GetAlignment() const { 33 const GLuint* GetHandle() const {
42 return alignment; 34 return &gl_buffer.handle;
43 } 35 }
44 36
45private: 37private:
46 VAddr cpu_addr{}; 38 OGLBuffer gl_buffer{};
47 std::size_t size{};
48 GLintptr offset{};
49 std::size_t alignment{};
50}; 39};
51 40
52class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 41class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
53public: 42public:
54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); 43 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
55 44 std::size_t stream_size);
56 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 45 ~OGLBufferCache();
57 /// allocated.
58 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
59 bool cache = true);
60 46
61 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 47 const GLuint* GetEmptyBuffer(std::size_t) override;
62 GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
63 48
64 bool Map(std::size_t max_size); 49protected:
65 void Unmap(); 50 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
66 51
67 GLuint GetHandle() const; 52 void WriteBarrier() override;
68 53
69protected: 54 const GLuint* ToHandle(const Buffer& buffer) override;
70 void AlignBuffer(std::size_t alignment);
71 55
72 // We do not have to flush this cache as things in it are never modified by us. 56 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
73 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} 57 const u8* data) override;
74 58
75private: 59 void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
76 OGLStreamBuffer stream_buffer; 60 u8* data) override;
77 61
78 u8* buffer_ptr = nullptr; 62 void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
79 GLintptr buffer_offset = 0; 63 std::size_t dst_offset, std::size_t size) override;
80 GLintptr buffer_offset_base = 0;
81}; 64};
82 65
83} // namespace OpenGL 66} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a48e14d2e..03d434b28 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -24,8 +24,12 @@ T GetInteger(GLenum pname) {
24 24
25Device::Device() { 25Device::Device() {
26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
27 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
30 has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
31 GLAD_GL_NV_shader_thread_shuffle;
32 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
29 has_variable_aoffi = TestVariableAoffi(); 33 has_variable_aoffi = TestVariableAoffi();
30 has_component_indexing_bug = TestComponentIndexingBug(); 34 has_component_indexing_bug = TestComponentIndexingBug();
31} 35}
@@ -34,6 +38,8 @@ Device::Device(std::nullptr_t) {
34 uniform_buffer_alignment = 0; 38 uniform_buffer_alignment = 0;
35 max_vertex_attributes = 16; 39 max_vertex_attributes = 16;
36 max_varyings = 15; 40 max_varyings = 15;
41 has_warp_intrinsics = true;
42 has_vertex_viewport_layer = true;
37 has_variable_aoffi = true; 43 has_variable_aoffi = true;
38 has_component_indexing_bug = false; 44 has_component_indexing_bug = false;
39} 45}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c93760..3ef7c6dd8 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -18,6 +18,10 @@ public:
18 return uniform_buffer_alignment; 18 return uniform_buffer_alignment;
19 } 19 }
20 20
21 std::size_t GetShaderStorageBufferAlignment() const {
22 return shader_storage_alignment;
23 }
24
21 u32 GetMaxVertexAttributes() const { 25 u32 GetMaxVertexAttributes() const {
22 return max_vertex_attributes; 26 return max_vertex_attributes;
23 } 27 }
@@ -26,6 +30,14 @@ public:
26 return max_varyings; 30 return max_varyings;
27 } 31 }
28 32
33 bool HasWarpIntrinsics() const {
34 return has_warp_intrinsics;
35 }
36
37 bool HasVertexViewportLayer() const {
38 return has_vertex_viewport_layer;
39 }
40
29 bool HasVariableAoffi() const { 41 bool HasVariableAoffi() const {
30 return has_variable_aoffi; 42 return has_variable_aoffi;
31 } 43 }
@@ -39,8 +51,11 @@ private:
39 static bool TestComponentIndexingBug(); 51 static bool TestComponentIndexingBug();
40 52
41 std::size_t uniform_buffer_alignment{}; 53 std::size_t uniform_buffer_alignment{};
54 std::size_t shader_storage_alignment{};
42 u32 max_vertex_attributes{}; 55 u32 max_vertex_attributes{};
43 u32 max_varyings{}; 56 u32 max_varyings{};
57 bool has_warp_intrinsics{};
58 bool has_vertex_viewport_layer{};
44 bool has_variable_aoffi{}; 59 bool has_variable_aoffi{};
45 bool has_component_indexing_bug{}; 60 bool has_component_indexing_bug{};
46}; 61};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
deleted file mode 100644
index d5e385151..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/logging/log.h"
8#include "core/core.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_opengl/gl_global_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
19 max_size{max_size} {
20 buffer.Create();
21 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
22}
23
24CachedGlobalRegion::~CachedGlobalRegion() = default;
25
26void CachedGlobalRegion::Reload(u32 size_) {
27 size = size_;
28 if (size > max_size) {
29 size = max_size;
30 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
31 max_size);
32 }
33 glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
34}
35
36void CachedGlobalRegion::Flush() {
37 LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
38 glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
50 u32 size) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) {
53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
56 ASSERT(cpu_addr);
57
58 region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
59 ReserveGlobalRegion(region);
60 }
61 region->Reload(size);
62 return region;
63}
64
65void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
66 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
67}
68
69GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
70 : RasterizerCache{rasterizer} {
71 GLint max_ssbo_size_;
72 glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
73 max_ssbo_size = static_cast<u32>(max_ssbo_size_);
74}
75
76GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
77 const GLShader::GlobalMemoryEntry& global_region,
78 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
79 std::lock_guard lock{mutex};
80
81 auto& gpu{Core::System::GetInstance().GPU()};
82 auto& memory_manager{gpu.MemoryManager()};
83 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
84 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
85 global_region.GetCbufOffset()};
86 const auto actual_addr{memory_manager.Read<u64>(addr)};
87 const auto size{memory_manager.Read<u32>(addr + 8)};
88
89 // Look up global region in the cache based on address
90 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
91 GlobalRegion region{TryGet(host_ptr)};
92
93 if (!region) {
94 // No global region found - create a new one
95 region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
96 Register(region);
97 }
98
99 return region;
100}
101
102} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
deleted file mode 100644
index 2d467a240..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ /dev/null
@@ -1,82 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <glad/glad.h>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17
18namespace OpenGL {
19
20namespace GLShader {
21class GlobalMemoryEntry;
22}
23
24class RasterizerOpenGL;
25class CachedGlobalRegion;
26using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27
28class CachedGlobalRegion final : public RasterizerCacheObject {
29public:
30 explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
31 ~CachedGlobalRegion();
32
33 VAddr GetCpuAddr() const override {
34 return cpu_addr;
35 }
36
37 std::size_t GetSizeInBytes() const override {
38 return size;
39 }
40
41 /// Gets the GL program handle for the buffer
42 GLuint GetBufferHandle() const {
43 return buffer.handle;
44 }
45
46 /// Reloads the global region from guest memory
47 void Reload(u32 size_);
48
49 void Flush();
50
51private:
52 VAddr cpu_addr{};
53 u8* host_ptr{};
54 u32 size{};
55 u32 max_size{};
56
57 OGLBuffer buffer;
58};
59
60class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
61public:
62 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
63
64 /// Gets the current specified shader stage program
65 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
66 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
67
68protected:
69 void FlushObjectInner(const GlobalRegion& object) override {
70 object->Flush();
71 }
72
73private:
74 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
75 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
76 void ReserveGlobalRegion(GlobalRegion region);
77
78 std::unordered_map<CacheAddr, GlobalRegion> reserve;
79 u32 max_ssbo_size{};
80};
81
82} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f45a3c5ef..bb09ecd52 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6#include <array> 6#include <array>
7#include <bitset>
7#include <memory> 8#include <memory>
8#include <string> 9#include <string>
9#include <string_view> 10#include <string_view>
@@ -19,7 +20,9 @@
19#include "core/core.h" 20#include "core/core.h"
20#include "core/hle/kernel/process.h" 21#include "core/hle/kernel/process.h"
21#include "core/settings.h" 22#include "core/settings.h"
23#include "video_core/engines/kepler_compute.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
25#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 26#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 27#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 28#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -80,16 +83,31 @@ struct DrawParameters {
80 } 83 }
81}; 84};
82 85
86static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
87 const GLShader::ConstBufferEntry& entry) {
88 if (!entry.IsIndirect()) {
89 return entry.GetSize();
90 }
91
92 if (buffer.size > Maxwell::MaxConstBufferSize) {
93 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
94 Maxwell::MaxConstBufferSize);
95 return Maxwell::MaxConstBufferSize;
96 }
97
98 return buffer.size;
99}
100
83RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 101RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
84 ScreenInfo& info) 102 ScreenInfo& info)
85 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, 103 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
86 global_cache{*this}, system{system}, screen_info{info}, 104 system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
87 buffer_cache(*this, STREAM_BUFFER_SIZE) {
88 OpenGLState::ApplyDefaultState(); 105 OpenGLState::ApplyDefaultState();
89 106
90 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 107 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
91 state.draw.shader_program = 0; 108 state.draw.shader_program = 0;
92 state.Apply(); 109 state.Apply();
110 clear_framebuffer.Create();
93 111
94 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); 112 LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
95 CheckExtensions(); 113 CheckExtensions();
@@ -109,10 +127,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
109 auto& gpu = system.GPU().Maxwell3D(); 127 auto& gpu = system.GPU().Maxwell3D();
110 const auto& regs = gpu.regs; 128 const auto& regs = gpu.regs;
111 129
112 if (!gpu.dirty_flags.vertex_attrib_format) { 130 if (!gpu.dirty.vertex_attrib_format) {
113 return state.draw.vertex_array; 131 return state.draw.vertex_array;
114 } 132 }
115 gpu.dirty_flags.vertex_attrib_format = false; 133 gpu.dirty.vertex_attrib_format = false;
116 134
117 MICROPROFILE_SCOPE(OpenGL_VAO); 135 MICROPROFILE_SCOPE(OpenGL_VAO);
118 136
@@ -129,8 +147,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
129 state.draw.vertex_array = vao; 147 state.draw.vertex_array = vao;
130 state.ApplyVertexArrayState(); 148 state.ApplyVertexArrayState();
131 149
132 glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
133
134 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. 150 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
135 // Enables the first 16 vertex attributes always, as we don't know which ones are actually 151 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
136 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 152 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
@@ -168,7 +184,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
168 } 184 }
169 185
170 // Rebinding the VAO invalidates the vertex buffer bindings. 186 // Rebinding the VAO invalidates the vertex buffer bindings.
171 gpu.dirty_flags.vertex_array.set(); 187 gpu.dirty.ResetVertexArrays();
172 188
173 state.draw.vertex_array = vao_entry.handle; 189 state.draw.vertex_array = vao_entry.handle;
174 return vao_entry.handle; 190 return vao_entry.handle;
@@ -176,17 +192,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
176 192
177void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { 193void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
178 auto& gpu = system.GPU().Maxwell3D(); 194 auto& gpu = system.GPU().Maxwell3D();
179 const auto& regs = gpu.regs; 195 if (!gpu.dirty.vertex_array_buffers)
180
181 if (gpu.dirty_flags.vertex_array.none())
182 return; 196 return;
197 gpu.dirty.vertex_array_buffers = false;
198
199 const auto& regs = gpu.regs;
183 200
184 MICROPROFILE_SCOPE(OpenGL_VB); 201 MICROPROFILE_SCOPE(OpenGL_VB);
185 202
186 // Upload all guest vertex arrays sequentially to our buffer 203 // Upload all guest vertex arrays sequentially to our buffer
187 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { 204 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
188 if (!gpu.dirty_flags.vertex_array[index]) 205 if (!gpu.dirty.vertex_array[index])
189 continue; 206 continue;
207 gpu.dirty.vertex_array[index] = false;
208 gpu.dirty.vertex_instance[index] = false;
190 209
191 const auto& vertex_array = regs.vertex_array[index]; 210 const auto& vertex_array = regs.vertex_array[index];
192 if (!vertex_array.IsEnabled()) 211 if (!vertex_array.IsEnabled())
@@ -197,11 +216,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
197 216
198 ASSERT(end > start); 217 ASSERT(end > start);
199 const u64 size = end - start + 1; 218 const u64 size = end - start + 1;
200 const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); 219 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
201 220
202 // Bind the vertex array to the buffer at the current offset. 221 // Bind the vertex array to the buffer at the current offset.
203 glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, 222 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
204 vertex_array.stride); 223 vertex_array.stride);
205 224
206 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { 225 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
207 // Enable vertex buffer instancing with the specified divisor. 226 // Enable vertex buffer instancing with the specified divisor.
@@ -211,11 +230,47 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
211 glVertexArrayBindingDivisor(vao, index, 0); 230 glVertexArrayBindingDivisor(vao, index, 0);
212 } 231 }
213 } 232 }
233}
214 234
215 gpu.dirty_flags.vertex_array.reset(); 235void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
236 auto& gpu = system.GPU().Maxwell3D();
237
238 if (!gpu.dirty.vertex_instances)
239 return;
240 gpu.dirty.vertex_instances = false;
241
242 const auto& regs = gpu.regs;
243 // Upload all guest vertex arrays sequentially to our buffer
244 for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
245 if (!gpu.dirty.vertex_instance[index])
246 continue;
247
248 gpu.dirty.vertex_instance[index] = false;
249
250 if (regs.instanced_arrays.IsInstancingEnabled(index) &&
251 regs.vertex_array[index].divisor != 0) {
252 // Enable vertex buffer instancing with the specified divisor.
253 glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
254 } else {
255 // Disable the vertex buffer instancing.
256 glVertexArrayBindingDivisor(vao, index, 0);
257 }
258 }
216} 259}
217 260
218DrawParameters RasterizerOpenGL::SetupDraw() { 261GLintptr RasterizerOpenGL::SetupIndexBuffer() {
262 if (accelerate_draw != AccelDraw::Indexed) {
263 return 0;
264 }
265 MICROPROFILE_SCOPE(OpenGL_Index);
266 const auto& regs = system.GPU().Maxwell3D().regs;
267 const std::size_t size = CalculateIndexBufferSize();
268 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
269 vertex_array_pushbuffer.SetIndexBuffer(buffer);
270 return offset;
271}
272
273DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
219 const auto& gpu = system.GPU().Maxwell3D(); 274 const auto& gpu = system.GPU().Maxwell3D();
220 const auto& regs = gpu.regs; 275 const auto& regs = gpu.regs;
221 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 276 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -227,11 +282,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
227 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); 282 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
228 283
229 if (is_indexed) { 284 if (is_indexed) {
230 MICROPROFILE_SCOPE(OpenGL_Index);
231 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); 285 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
232 params.count = regs.index_array.count; 286 params.count = regs.index_array.count;
233 params.index_buffer_offset = 287 params.index_buffer_offset = index_buffer_offset;
234 buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
235 params.base_vertex = static_cast<GLint>(regs.vb_element_base); 288 params.base_vertex = static_cast<GLint>(regs.vb_element_base);
236 } else { 289 } else {
237 params.count = regs.vertex_buffer.count; 290 params.count = regs.vertex_buffer.count;
@@ -247,10 +300,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
247 BaseBindings base_bindings; 300 BaseBindings base_bindings;
248 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 301 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
249 302
250 // Prepare packed bindings
251 bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
252 bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
253
254 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 303 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
255 const auto& shader_config = gpu.regs.shader_config[index]; 304 const auto& shader_config = gpu.regs.shader_config[index];
256 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 305 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -271,18 +320,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
271 320
272 GLShader::MaxwellUniformData ubo{}; 321 GLShader::MaxwellUniformData ubo{};
273 ubo.SetFromRegs(gpu, stage); 322 ubo.SetFromRegs(gpu, stage);
274 const GLintptr offset = 323 const auto [buffer, offset] =
275 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 324 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
276 325
277 // Bind the emulation info buffer 326 // Bind the emulation info buffer
278 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, 327 bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
279 static_cast<GLsizeiptr>(sizeof(ubo)));
280 328
281 Shader shader{shader_cache.GetStageProgram(program)}; 329 Shader shader{shader_cache.GetStageProgram(program)};
282 330
283 const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; 331 const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
284 SetupDrawConstBuffers(stage_enum, shader); 332 SetupDrawConstBuffers(stage_enum, shader);
285 SetupGlobalRegions(stage_enum, shader); 333 SetupDrawGlobalMemory(stage_enum, shader);
286 const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; 334 const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
287 335
288 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; 336 const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
@@ -321,12 +369,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
321 base_bindings = next_bindings; 369 base_bindings = next_bindings;
322 } 370 }
323 371
324 bind_ubo_pushbuffer.Bind();
325 bind_ssbo_pushbuffer.Bind();
326
327 SyncClipEnabled(clip_distances); 372 SyncClipEnabled(clip_distances);
328 373
329 gpu.dirty_flags.shaders = false; 374 gpu.dirty.shaders = false;
330} 375}
331 376
332std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { 377std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -409,13 +454,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
409 454
410 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents, 455 const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
411 single_color_target}; 456 single_color_target};
412 if (fb_config_state == current_framebuffer_config_state && 457 if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
413 gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
414 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or 458 // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
415 // single color targets). This is done because the guest registers may not change but the 459 // single color targets). This is done because the guest registers may not change but the
416 // host framebuffer may contain different attachments 460 // host framebuffer may contain different attachments
417 return current_depth_stencil_usage; 461 return current_depth_stencil_usage;
418 } 462 }
463 gpu.dirty.render_settings = false;
419 current_framebuffer_config_state = fb_config_state; 464 current_framebuffer_config_state = fb_config_state;
420 465
421 texture_cache.GuardRenderTargets(true); 466 texture_cache.GuardRenderTargets(true);
@@ -504,13 +549,71 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
504 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; 549 return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
505} 550}
506 551
552void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
553 bool using_depth_fb, bool using_stencil_fb) {
554 auto& gpu = system.GPU().Maxwell3D();
555 const auto& regs = gpu.regs;
556
557 texture_cache.GuardRenderTargets(true);
558 View color_surface{};
559 if (using_color_fb) {
560 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
561 }
562 View depth_surface{};
563 if (using_depth_fb || using_stencil_fb) {
564 depth_surface = texture_cache.GetDepthBufferSurface(false);
565 }
566 texture_cache.GuardRenderTargets(false);
567
568 current_state.draw.draw_framebuffer = clear_framebuffer.handle;
569 current_state.ApplyFramebufferState();
570
571 if (color_surface) {
572 color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
573 } else {
574 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
575 }
576
577 if (depth_surface) {
578 const auto& params = depth_surface->GetSurfaceParams();
579 switch (params.type) {
580 case VideoCore::Surface::SurfaceType::Depth: {
581 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
582 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
583 break;
584 }
585 case VideoCore::Surface::SurfaceType::DepthStencil: {
586 depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
587 break;
588 }
589 default: { UNIMPLEMENTED(); }
590 }
591 } else {
592 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
593 0);
594 }
595}
596
507void RasterizerOpenGL::Clear() { 597void RasterizerOpenGL::Clear() {
508 const auto& regs = system.GPU().Maxwell3D().regs; 598 const auto& maxwell3d = system.GPU().Maxwell3D();
599
600 if (!maxwell3d.ShouldExecute()) {
601 return;
602 }
603
604 const auto& regs = maxwell3d.regs;
509 bool use_color{}; 605 bool use_color{};
510 bool use_depth{}; 606 bool use_depth{};
511 bool use_stencil{}; 607 bool use_stencil{};
512 608
513 OpenGLState clear_state; 609 OpenGLState prev_state{OpenGLState::GetCurState()};
610 SCOPE_EXIT({
611 prev_state.AllDirty();
612 prev_state.Apply();
613 });
614
615 OpenGLState clear_state{OpenGLState::GetCurState()};
616 clear_state.SetDefaultViewports();
514 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || 617 if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
515 regs.clear_buffers.A) { 618 regs.clear_buffers.A) {
516 use_color = true; 619 use_color = true;
@@ -530,6 +633,7 @@ void RasterizerOpenGL::Clear() {
530 // true. 633 // true.
531 clear_state.depth.test_enabled = true; 634 clear_state.depth.test_enabled = true;
532 clear_state.depth.test_func = GL_ALWAYS; 635 clear_state.depth.test_func = GL_ALWAYS;
636 clear_state.depth.write_mask = GL_TRUE;
533 } 637 }
534 if (regs.clear_buffers.S) { 638 if (regs.clear_buffers.S) {
535 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); 639 ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -566,8 +670,9 @@ void RasterizerOpenGL::Clear() {
566 return; 670 return;
567 } 671 }
568 672
569 const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( 673 ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
570 clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); 674
675 SyncViewport(clear_state);
571 if (regs.clear_flags.scissor) { 676 if (regs.clear_flags.scissor) {
572 SyncScissorTest(clear_state); 677 SyncScissorTest(clear_state);
573 } 678 }
@@ -576,21 +681,18 @@ void RasterizerOpenGL::Clear() {
576 clear_state.EmulateViewportWithScissor(); 681 clear_state.EmulateViewportWithScissor();
577 } 682 }
578 683
579 clear_state.ApplyColorMask(); 684 clear_state.AllDirty();
580 clear_state.ApplyDepth(); 685 clear_state.Apply();
581 clear_state.ApplyStencilTest();
582 clear_state.ApplyViewport();
583 clear_state.ApplyFramebufferState();
584 686
585 if (use_color) { 687 if (use_color) {
586 glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); 688 glClearBufferfv(GL_COLOR, 0, regs.clear_color);
587 } 689 }
588 690
589 if (clear_depth && clear_stencil) { 691 if (use_depth && use_stencil) {
590 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); 692 glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
591 } else if (clear_depth) { 693 } else if (use_depth) {
592 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth); 694 glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
593 } else if (clear_stencil) { 695 } else if (use_stencil) {
594 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil); 696 glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
595 } 697 }
596} 698}
@@ -601,7 +703,10 @@ void RasterizerOpenGL::DrawArrays() {
601 703
602 MICROPROFILE_SCOPE(OpenGL_Drawing); 704 MICROPROFILE_SCOPE(OpenGL_Drawing);
603 auto& gpu = system.GPU().Maxwell3D(); 705 auto& gpu = system.GPU().Maxwell3D();
604 const auto& regs = gpu.regs; 706
707 if (!gpu.ShouldExecute()) {
708 return;
709 }
605 710
606 SyncColorMask(); 711 SyncColorMask();
607 SyncFragmentColorClampState(); 712 SyncFragmentColorClampState();
@@ -634,26 +739,47 @@ void RasterizerOpenGL::DrawArrays() {
634 Maxwell::MaxShaderStage; 739 Maxwell::MaxShaderStage;
635 740
636 // Add space for at least 18 constant buffers 741 // Add space for at least 18 constant buffers
637 buffer_size += 742 buffer_size += Maxwell::MaxConstBuffers *
638 Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); 743 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
639 744
640 const bool invalidate = buffer_cache.Map(buffer_size); 745 // Prepare the vertex array.
641 if (invalidate) { 746 buffer_cache.Map(buffer_size);
642 // As all cached buffers are invalidated, we need to recheck their state.
643 gpu.dirty_flags.vertex_array.set();
644 }
645 747
748 // Prepare vertex array format.
646 const GLuint vao = SetupVertexFormat(); 749 const GLuint vao = SetupVertexFormat();
750 vertex_array_pushbuffer.Setup(vao);
751
752 // Upload vertex and index data.
647 SetupVertexBuffer(vao); 753 SetupVertexBuffer(vao);
754 SetupVertexInstances(vao);
755 const GLintptr index_buffer_offset = SetupIndexBuffer();
756
757 // Setup draw parameters. It will automatically choose what glDraw* method to use.
758 const DrawParameters params = SetupDraw(index_buffer_offset);
648 759
649 DrawParameters params = SetupDraw(); 760 // Prepare packed bindings.
761 bind_ubo_pushbuffer.Setup(0);
762 bind_ssbo_pushbuffer.Setup(0);
763
764 // Setup shaders and their used resources.
650 texture_cache.GuardSamplers(true); 765 texture_cache.GuardSamplers(true);
651 SetupShaders(params.primitive_mode); 766 SetupShaders(params.primitive_mode);
652 texture_cache.GuardSamplers(false); 767 texture_cache.GuardSamplers(false);
653 768
654 ConfigureFramebuffers(state); 769 ConfigureFramebuffers(state);
655 770
656 buffer_cache.Unmap(); 771 // Signal the buffer cache that we are not going to upload more things.
772 const bool invalidate = buffer_cache.Unmap();
773
774 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
775 vertex_array_pushbuffer.Bind();
776 bind_ubo_pushbuffer.Bind();
777 bind_ssbo_pushbuffer.Bind();
778
779 if (invalidate) {
780 // As all cached buffers are invalidated, we need to recheck their state.
781 gpu.dirty.ResetVertexArrays();
782 }
657 783
658 shader_program_manager->ApplyTo(state); 784 shader_program_manager->ApplyTo(state);
659 state.Apply(); 785 state.Apply();
@@ -665,6 +791,46 @@ void RasterizerOpenGL::DrawArrays() {
665 params.DispatchDraw(); 791 params.DispatchDraw();
666 792
667 accelerate_draw = AccelDraw::Disabled; 793 accelerate_draw = AccelDraw::Disabled;
794 gpu.dirty.memory_general = false;
795}
796
797void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
798 if (!GLAD_GL_ARB_compute_variable_group_size) {
799 LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the "
800 "lack of GL_ARB_compute_variable_group_size");
801 return;
802 }
803
804 auto kernel = shader_cache.GetComputeKernel(code_addr);
805 const auto [program, next_bindings] = kernel->GetProgramHandle({});
806 state.draw.shader_program = program;
807 state.draw.program_pipeline = 0;
808
809 const std::size_t buffer_size =
810 Tegra::Engines::KeplerCompute::NumConstBuffers *
811 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
812 buffer_cache.Map(buffer_size);
813
814 bind_ubo_pushbuffer.Setup(0);
815 bind_ssbo_pushbuffer.Setup(0);
816
817 SetupComputeConstBuffers(kernel);
818 SetupComputeGlobalMemory(kernel);
819
820 // TODO(Rodrigo): Bind images and samplers
821
822 buffer_cache.Unmap();
823
824 bind_ubo_pushbuffer.Bind();
825 bind_ssbo_pushbuffer.Bind();
826
827 state.ApplyShaderProgram();
828 state.ApplyProgramPipeline();
829
830 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
831 glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y,
832 launch_desc.grid_dim_z, launch_desc.block_dim_x,
833 launch_desc.block_dim_y, launch_desc.block_dim_z);
668} 834}
669 835
670void RasterizerOpenGL::FlushAll() {} 836void RasterizerOpenGL::FlushAll() {}
@@ -675,7 +841,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
675 return; 841 return;
676 } 842 }
677 texture_cache.FlushRegion(addr, size); 843 texture_cache.FlushRegion(addr, size);
678 global_cache.FlushRegion(addr, size); 844 buffer_cache.FlushRegion(addr, size);
679} 845}
680 846
681void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 847void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -685,7 +851,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
685 } 851 }
686 texture_cache.InvalidateRegion(addr, size); 852 texture_cache.InvalidateRegion(addr, size);
687 shader_cache.InvalidateRegion(addr, size); 853 shader_cache.InvalidateRegion(addr, size);
688 global_cache.InvalidateRegion(addr, size);
689 buffer_cache.InvalidateRegion(addr, size); 854 buffer_cache.InvalidateRegion(addr, size);
690} 855}
691 856
@@ -696,6 +861,14 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
696 InvalidateRegion(addr, size); 861 InvalidateRegion(addr, size);
697} 862}
698 863
864void RasterizerOpenGL::FlushCommands() {
865 glFlush();
866}
867
868void RasterizerOpenGL::TickFrame() {
869 buffer_cache.TickFrame();
870}
871
699bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 872bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
700 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 873 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
701 const Tegra::Engines::Fermi2D::Config& copy_config) { 874 const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -737,14 +910,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
737void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 910void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
738 const Shader& shader) { 911 const Shader& shader) {
739 MICROPROFILE_SCOPE(OpenGL_UBO); 912 MICROPROFILE_SCOPE(OpenGL_UBO);
740 const auto stage_index = static_cast<std::size_t>(stage); 913 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
741 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; 914 const auto& shader_stage = stages[static_cast<std::size_t>(stage)];
742 const auto& entries = shader->GetShaderEntries().const_buffers; 915 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
916 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
917 SetupConstBuffer(buffer, entry);
918 }
919}
743 920
744 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 921void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
745 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 922 MICROPROFILE_SCOPE(OpenGL_UBO);
746 const auto& entry = entries[bindpoint]; 923 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
747 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); 924 for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
925 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
926 const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
927 Tegra::Engines::ConstBufferInfo buffer;
928 buffer.address = config.Address();
929 buffer.size = config.size;
930 buffer.enabled = mask[entry.GetIndex()];
931 SetupConstBuffer(buffer, entry);
748 } 932 }
749} 933}
750 934
@@ -752,49 +936,52 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
752 const GLShader::ConstBufferEntry& entry) { 936 const GLShader::ConstBufferEntry& entry) {
753 if (!buffer.enabled) { 937 if (!buffer.enabled) {
754 // Set values to zero to unbind buffers 938 // Set values to zero to unbind buffers
755 bind_ubo_pushbuffer.Push(0, 0, 0); 939 bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
756 return; 940 return;
757 } 941 }
758 942
759 std::size_t size;
760 if (entry.IsIndirect()) {
761 // Buffer is accessed indirectly, so upload the entire thing
762 size = buffer.size;
763
764 if (size > MaxConstbufferSize) {
765 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
766 MaxConstbufferSize);
767 size = MaxConstbufferSize;
768 }
769 } else {
770 // Buffer is accessed directly, upload just what we use
771 size = entry.GetSize();
772 }
773
774 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 943 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
775 // UBO alignment requirements. 944 // UBO alignment requirements.
776 size = Common::AlignUp(size, sizeof(GLvec4)); 945 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
777 ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
778 946
779 const std::size_t alignment = device.GetUniformBufferAlignment(); 947 const auto alignment = device.GetUniformBufferAlignment();
780 const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); 948 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
781 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); 949 bind_ubo_pushbuffer.Push(cbuf, offset, size);
782} 950}
783 951
784void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 952void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
785 const Shader& shader) { 953 const Shader& shader) {
786 const auto& entries = shader->GetShaderEntries().global_memory_entries; 954 auto& gpu{system.GPU()};
787 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 955 auto& memory_manager{gpu.MemoryManager()};
788 const auto& entry{entries[bindpoint]}; 956 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
789 const auto& region{global_cache.GetGlobalRegion(entry, stage)}; 957 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
790 if (entry.IsWritten()) { 958 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
791 region->MarkAsModified(true, global_cache); 959 const auto gpu_addr{memory_manager.Read<u64>(addr)};
792 } 960 const auto size{memory_manager.Read<u32>(addr + 8)};
793 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, 961 SetupGlobalMemory(entry, gpu_addr, size);
794 static_cast<GLsizeiptr>(region->GetSizeInBytes())); 962 }
963}
964
965void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
966 auto& gpu{system.GPU()};
967 auto& memory_manager{gpu.MemoryManager()};
968 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
969 for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
970 const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
971 const auto gpu_addr{memory_manager.Read<u64>(addr)};
972 const auto size{memory_manager.Read<u32>(addr + 8)};
973 SetupGlobalMemory(entry, gpu_addr, size);
795 } 974 }
796} 975}
797 976
977void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry,
978 GPUVAddr gpu_addr, std::size_t size) {
979 const auto alignment{device.GetShaderStorageBufferAlignment()};
980 const auto [ssbo, buffer_offset] =
981 buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
982 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
983}
984
798TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, 985TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
799 BaseBindings base_bindings) { 986 BaseBindings base_bindings) {
800 MICROPROFILE_SCOPE(OpenGL_Texture); 987 MICROPROFILE_SCOPE(OpenGL_Texture);
@@ -883,10 +1070,11 @@ void RasterizerOpenGL::SyncClipCoef() {
883} 1070}
884 1071
885void RasterizerOpenGL::SyncCullMode() { 1072void RasterizerOpenGL::SyncCullMode() {
886 const auto& regs = system.GPU().Maxwell3D().regs; 1073 auto& maxwell3d = system.GPU().Maxwell3D();
887 1074
888 state.cull.enabled = regs.cull.enabled != 0; 1075 const auto& regs = maxwell3d.regs;
889 1076
1077 state.cull.enabled = regs.cull.enabled != 0;
890 if (state.cull.enabled) { 1078 if (state.cull.enabled) {
891 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); 1079 state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
892 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); 1080 state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@@ -919,16 +1107,21 @@ void RasterizerOpenGL::SyncDepthTestState() {
919 state.depth.test_enabled = regs.depth_test_enable != 0; 1107 state.depth.test_enabled = regs.depth_test_enable != 0;
920 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; 1108 state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
921 1109
922 if (!state.depth.test_enabled) 1110 if (!state.depth.test_enabled) {
923 return; 1111 return;
1112 }
924 1113
925 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); 1114 state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
926} 1115}
927 1116
928void RasterizerOpenGL::SyncStencilTestState() { 1117void RasterizerOpenGL::SyncStencilTestState() {
929 const auto& regs = system.GPU().Maxwell3D().regs; 1118 auto& maxwell3d = system.GPU().Maxwell3D();
930 state.stencil.test_enabled = regs.stencil_enable != 0; 1119 if (!maxwell3d.dirty.stencil_test) {
1120 return;
1121 }
1122 const auto& regs = maxwell3d.regs;
931 1123
1124 state.stencil.test_enabled = regs.stencil_enable != 0;
932 if (!regs.stencil_enable) { 1125 if (!regs.stencil_enable) {
933 return; 1126 return;
934 } 1127 }
@@ -957,10 +1150,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
957 state.stencil.back.action_depth_fail = GL_KEEP; 1150 state.stencil.back.action_depth_fail = GL_KEEP;
958 state.stencil.back.action_depth_pass = GL_KEEP; 1151 state.stencil.back.action_depth_pass = GL_KEEP;
959 } 1152 }
1153 state.MarkDirtyStencilState();
1154 maxwell3d.dirty.stencil_test = false;
960} 1155}
961 1156
962void RasterizerOpenGL::SyncColorMask() { 1157void RasterizerOpenGL::SyncColorMask() {
963 const auto& regs = system.GPU().Maxwell3D().regs; 1158 auto& maxwell3d = system.GPU().Maxwell3D();
1159 if (!maxwell3d.dirty.color_mask) {
1160 return;
1161 }
1162 const auto& regs = maxwell3d.regs;
1163
964 const std::size_t count = 1164 const std::size_t count =
965 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; 1165 regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
966 for (std::size_t i = 0; i < count; i++) { 1166 for (std::size_t i = 0; i < count; i++) {
@@ -971,6 +1171,9 @@ void RasterizerOpenGL::SyncColorMask() {
971 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; 1171 dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
972 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; 1172 dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
973 } 1173 }
1174
1175 state.MarkDirtyColorMask();
1176 maxwell3d.dirty.color_mask = false;
974} 1177}
975 1178
976void RasterizerOpenGL::SyncMultiSampleState() { 1179void RasterizerOpenGL::SyncMultiSampleState() {
@@ -985,7 +1188,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
985} 1188}
986 1189
987void RasterizerOpenGL::SyncBlendState() { 1190void RasterizerOpenGL::SyncBlendState() {
988 const auto& regs = system.GPU().Maxwell3D().regs; 1191 auto& maxwell3d = system.GPU().Maxwell3D();
1192 if (!maxwell3d.dirty.blend_state) {
1193 return;
1194 }
1195 const auto& regs = maxwell3d.regs;
989 1196
990 state.blend_color.red = regs.blend_color.r; 1197 state.blend_color.red = regs.blend_color.r;
991 state.blend_color.green = regs.blend_color.g; 1198 state.blend_color.green = regs.blend_color.g;
@@ -1008,6 +1215,8 @@ void RasterizerOpenGL::SyncBlendState() {
1008 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 1215 for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
1009 state.blend[i].enabled = false; 1216 state.blend[i].enabled = false;
1010 } 1217 }
1218 maxwell3d.dirty.blend_state = false;
1219 state.MarkDirtyBlendState();
1011 return; 1220 return;
1012 } 1221 }
1013 1222
@@ -1024,6 +1233,9 @@ void RasterizerOpenGL::SyncBlendState() {
1024 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); 1233 blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
1025 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); 1234 blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
1026 } 1235 }
1236
1237 state.MarkDirtyBlendState();
1238 maxwell3d.dirty.blend_state = false;
1027} 1239}
1028 1240
1029void RasterizerOpenGL::SyncLogicOpState() { 1241void RasterizerOpenGL::SyncLogicOpState() {
@@ -1075,13 +1287,21 @@ void RasterizerOpenGL::SyncPointState() {
1075} 1287}
1076 1288
1077void RasterizerOpenGL::SyncPolygonOffset() { 1289void RasterizerOpenGL::SyncPolygonOffset() {
1078 const auto& regs = system.GPU().Maxwell3D().regs; 1290 auto& maxwell3d = system.GPU().Maxwell3D();
1291 if (!maxwell3d.dirty.polygon_offset) {
1292 return;
1293 }
1294 const auto& regs = maxwell3d.regs;
1295
1079 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; 1296 state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
1080 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; 1297 state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
1081 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; 1298 state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
1082 state.polygon_offset.units = regs.polygon_offset_units; 1299 state.polygon_offset.units = regs.polygon_offset_units;
1083 state.polygon_offset.factor = regs.polygon_offset_factor; 1300 state.polygon_offset.factor = regs.polygon_offset_factor;
1084 state.polygon_offset.clamp = regs.polygon_offset_clamp; 1301 state.polygon_offset.clamp = regs.polygon_offset_clamp;
1302
1303 state.MarkDirtyPolygonOffset();
1304 maxwell3d.dirty.polygon_offset = false;
1085} 1305}
1086 1306
1087void RasterizerOpenGL::SyncAlphaTest() { 1307void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bf67e3a70..9d20a4fbf 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,7 +24,6 @@
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_global_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h" 28#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -59,10 +58,13 @@ public:
59 58
60 void DrawArrays() override; 59 void DrawArrays() override;
61 void Clear() override; 60 void Clear() override;
61 void DispatchCompute(GPUVAddr code_addr) override;
62 void FlushAll() override; 62 void FlushAll() override;
63 void FlushRegion(CacheAddr addr, u64 size) override; 63 void FlushRegion(CacheAddr addr, u64 size) override;
64 void InvalidateRegion(CacheAddr addr, u64 size) override; 64 void InvalidateRegion(CacheAddr addr, u64 size) override;
65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
66 void FlushCommands() override;
67 void TickFrame() override;
66 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 68 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
67 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 69 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
68 const Tegra::Engines::Fermi2D::Config& copy_config) override; 70 const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -73,11 +75,6 @@ public:
73 void LoadDiskResources(const std::atomic_bool& stop_loading, 75 void LoadDiskResources(const std::atomic_bool& stop_loading,
74 const VideoCore::DiskResourceLoadCallback& callback) override; 76 const VideoCore::DiskResourceLoadCallback& callback) override;
75 77
76 /// Maximum supported size that a constbuffer can have in bytes.
77 static constexpr std::size_t MaxConstbufferSize = 0x10000;
78 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
79 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
80
81private: 78private:
82 struct FramebufferConfigState { 79 struct FramebufferConfigState {
83 bool using_color_fb{}; 80 bool using_color_fb{};
@@ -98,30 +95,45 @@ private:
98 95
99 /** 96 /**
100 * Configures the color and depth framebuffer states. 97 * Configures the color and depth framebuffer states.
101 * @param must_reconfigure If true, tells the framebuffer to skip the cache and reconfigure 98 *
102 * again. Used by the texture cache to solve texception conflicts 99 * @param current_state The current OpenGL state.
103 * @param use_color_fb If true, configure color framebuffers. 100 * @param using_color_fb If true, configure color framebuffers.
104 * @param using_depth_fb If true, configure the depth/stencil framebuffer. 101 * @param using_depth_fb If true, configure the depth/stencil framebuffer.
105 * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. 102 * @param preserve_contents If true, tries to preserve data from a previously used
103 * framebuffer.
106 * @param single_color_target Specifies if a single color buffer target should be used. 104 * @param single_color_target Specifies if a single color buffer target should be used.
105 *
107 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture 106 * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
108 * (requires using_depth_fb to be true) 107 * (requires using_depth_fb to be true)
109 */ 108 */
110 std::pair<bool, bool> ConfigureFramebuffers( 109 std::pair<bool, bool> ConfigureFramebuffers(
111 OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, 110 OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
112 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); 111 bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
113 112
113 void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
114 bool using_depth_fb, bool using_stencil_fb);
115
114 /// Configures the current constbuffers to use for the draw command. 116 /// Configures the current constbuffers to use for the draw command.
115 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 117 void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
116 const Shader& shader); 118 const Shader& shader);
117 119
120 /// Configures the current constbuffers to use for the kernel invocation.
121 void SetupComputeConstBuffers(const Shader& kernel);
122
118 /// Configures a constant buffer. 123 /// Configures a constant buffer.
119 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, 124 void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
120 const GLShader::ConstBufferEntry& entry); 125 const GLShader::ConstBufferEntry& entry);
121 126
122 /// Configures the current global memory entries to use for the draw command. 127 /// Configures the current global memory entries to use for the draw command.
123 void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 128 void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
124 const Shader& shader); 129 const Shader& shader);
130
131 /// Configures the current global memory entries to use for the kernel invocation.
132 void SetupComputeGlobalMemory(const Shader& kernel);
133
134 /// Configures a constant buffer.
135 void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
136 std::size_t size);
125 137
126 /// Configures the current textures to use for the draw command. Returns shaders texture buffer 138 /// Configures the current textures to use for the draw command. Returns shaders texture buffer
127 /// usage. 139 /// usage.
@@ -189,7 +201,6 @@ private:
189 201
190 TextureCacheOpenGL texture_cache; 202 TextureCacheOpenGL texture_cache;
191 ShaderCacheOpenGL shader_cache; 203 ShaderCacheOpenGL shader_cache;
192 GlobalRegionCacheOpenGL global_cache;
193 SamplerCacheOpenGL sampler_cache; 204 SamplerCacheOpenGL sampler_cache;
194 FramebufferCacheOpenGL framebuffer_cache; 205 FramebufferCacheOpenGL framebuffer_cache;
195 206
@@ -208,6 +219,7 @@ private:
208 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 219 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
209 OGLBufferCache buffer_cache; 220 OGLBufferCache buffer_cache;
210 221
222 VertexArrayPushBuffer vertex_array_pushbuffer;
211 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 223 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
212 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 224 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
213 225
@@ -219,14 +231,19 @@ private:
219 GLuint SetupVertexFormat(); 231 GLuint SetupVertexFormat();
220 232
221 void SetupVertexBuffer(GLuint vao); 233 void SetupVertexBuffer(GLuint vao);
234 void SetupVertexInstances(GLuint vao);
222 235
223 DrawParameters SetupDraw(); 236 GLintptr SetupIndexBuffer();
237
238 DrawParameters SetupDraw(GLintptr index_buffer_offset);
224 239
225 void SetupShaders(GLenum primitive_mode); 240 void SetupShaders(GLenum primitive_mode);
226 241
227 enum class AccelDraw { Disabled, Arrays, Indexed }; 242 enum class AccelDraw { Disabled, Arrays, Indexed };
228 AccelDraw accelerate_draw = AccelDraw::Disabled; 243 AccelDraw accelerate_draw = AccelDraw::Disabled;
229 244
245 OGLFramebuffer clear_framebuffer;
246
230 using CachedPageMap = boost::icl::interval_map<u64, int>; 247 using CachedPageMap = boost::icl::interval_map<u64, int>;
231 CachedPageMap cached_pages; 248 CachedPageMap cached_pages;
232}; 249};
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
index defbc2d81..34ee37f00 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.h
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.h
@@ -17,9 +17,9 @@ public:
17 ~SamplerCacheOpenGL(); 17 ~SamplerCacheOpenGL();
18 18
19protected: 19protected:
20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; 20 OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
21 21
22 GLuint ToSamplerType(const OGLSampler& sampler) const; 22 GLuint ToSamplerType(const OGLSampler& sampler) const override;
23}; 23};
24 24
25} // namespace OpenGL 25} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f9b2b03a0..cf6a5cddf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -23,13 +23,13 @@ namespace OpenGL {
23 23
24using VideoCommon::Shader::ProgramCode; 24using VideoCommon::Shader::ProgramCode;
25 25
26// One UBO is always reserved for emulation values 26// One UBO is always reserved for emulation values on staged shaders
27constexpr u32 RESERVED_UBOS = 1; 27constexpr u32 STAGE_RESERVED_UBOS = 1;
28 28
29struct UnspecializedShader { 29struct UnspecializedShader {
30 std::string code; 30 std::string code;
31 GLShader::ShaderEntries entries; 31 GLShader::ShaderEntries entries;
32 Maxwell::ShaderProgram program_type; 32 ProgramType program_type;
33}; 33};
34 34
35namespace { 35namespace {
@@ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
55} 55}
56 56
57/// Gets the shader type from a Maxwell program type 57/// Gets the shader type from a Maxwell program type
58constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { 58constexpr GLenum GetShaderType(ProgramType program_type) {
59 switch (program_type) { 59 switch (program_type) {
60 case Maxwell::ShaderProgram::VertexA: 60 case ProgramType::VertexA:
61 case Maxwell::ShaderProgram::VertexB: 61 case ProgramType::VertexB:
62 return GL_VERTEX_SHADER; 62 return GL_VERTEX_SHADER;
63 case Maxwell::ShaderProgram::Geometry: 63 case ProgramType::Geometry:
64 return GL_GEOMETRY_SHADER; 64 return GL_GEOMETRY_SHADER;
65 case Maxwell::ShaderProgram::Fragment: 65 case ProgramType::Fragment:
66 return GL_FRAGMENT_SHADER; 66 return GL_FRAGMENT_SHADER;
67 case ProgramType::Compute:
68 return GL_COMPUTE_SHADER;
67 default: 69 default:
68 return GL_NONE; 70 return GL_NONE;
69 } 71 }
@@ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen
100 } 102 }
101} 103}
102 104
105ProgramType GetProgramType(Maxwell::ShaderProgram program) {
106 switch (program) {
107 case Maxwell::ShaderProgram::VertexA:
108 return ProgramType::VertexA;
109 case Maxwell::ShaderProgram::VertexB:
110 return ProgramType::VertexB;
111 case Maxwell::ShaderProgram::TesselationControl:
112 return ProgramType::TessellationControl;
113 case Maxwell::ShaderProgram::TesselationEval:
114 return ProgramType::TessellationEval;
115 case Maxwell::ShaderProgram::Geometry:
116 return ProgramType::Geometry;
117 case Maxwell::ShaderProgram::Fragment:
118 return ProgramType::Fragment;
119 }
120 UNREACHABLE();
121 return {};
122}
123
103/// Calculates the size of a program stream 124/// Calculates the size of a program stream
104std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { 125std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
105 constexpr std::size_t start_offset = 10; 126 constexpr std::size_t start_offset = 10;
@@ -128,11 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
128} 149}
129 150
130/// Hashes one (or two) program streams 151/// Hashes one (or two) program streams
131u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, 152u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
132 const ProgramCode& code_b) { 153 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
133 u64 unique_identifier = 154 if (size_a == 0) {
134 Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code)); 155 size_a = CalculateProgramSize(code);
135 if (program_type != Maxwell::ShaderProgram::VertexA) { 156 }
157 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
158 if (program_type != ProgramType::VertexA) {
136 return unique_identifier; 159 return unique_identifier;
137 } 160 }
138 // VertexA programs include two programs 161 // VertexA programs include two programs
@@ -140,50 +163,69 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
140 std::size_t seed = 0; 163 std::size_t seed = 0;
141 boost::hash_combine(seed, unique_identifier); 164 boost::hash_combine(seed, unique_identifier);
142 165
143 const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), 166 if (size_b == 0) {
144 CalculateProgramSize(code_b)); 167 size_b = CalculateProgramSize(code_b);
168 }
169 const u64 identifier_b =
170 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
145 boost::hash_combine(seed, identifier_b); 171 boost::hash_combine(seed, identifier_b);
146 return static_cast<u64>(seed); 172 return static_cast<u64>(seed);
147} 173}
148 174
149/// Creates an unspecialized program from code streams 175/// Creates an unspecialized program from code streams
150GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, 176GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
151 ProgramCode program_code, ProgramCode program_code_b) { 177 ProgramCode program_code, ProgramCode program_code_b) {
152 GLShader::ShaderSetup setup(program_code); 178 GLShader::ShaderSetup setup(program_code);
153 if (program_type == Maxwell::ShaderProgram::VertexA) { 179 setup.program.size_a = CalculateProgramSize(program_code);
180 setup.program.size_b = 0;
181 if (program_type == ProgramType::VertexA) {
154 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. 182 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
155 // Conventional HW does not support this, so we combine VertexA and VertexB into one 183 // Conventional HW does not support this, so we combine VertexA and VertexB into one
156 // stage here. 184 // stage here.
157 setup.SetProgramB(program_code_b); 185 setup.SetProgramB(program_code_b);
186 setup.program.size_b = CalculateProgramSize(program_code_b);
158 } 187 }
159 setup.program.unique_identifier = 188 setup.program.unique_identifier = GetUniqueIdentifier(
160 GetUniqueIdentifier(program_type, program_code, program_code_b); 189 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
161 190
162 switch (program_type) { 191 switch (program_type) {
163 case Maxwell::ShaderProgram::VertexA: 192 case ProgramType::VertexA:
164 case Maxwell::ShaderProgram::VertexB: 193 case ProgramType::VertexB:
165 return GLShader::GenerateVertexShader(device, setup); 194 return GLShader::GenerateVertexShader(device, setup);
166 case Maxwell::ShaderProgram::Geometry: 195 case ProgramType::Geometry:
167 return GLShader::GenerateGeometryShader(device, setup); 196 return GLShader::GenerateGeometryShader(device, setup);
168 case Maxwell::ShaderProgram::Fragment: 197 case ProgramType::Fragment:
169 return GLShader::GenerateFragmentShader(device, setup); 198 return GLShader::GenerateFragmentShader(device, setup);
199 case ProgramType::Compute:
200 return GLShader::GenerateComputeShader(device, setup);
170 default: 201 default:
171 LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); 202 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
172 UNREACHABLE();
173 return {}; 203 return {};
174 } 204 }
175} 205}
176 206
177CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 207CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
178 Maxwell::ShaderProgram program_type, const ProgramVariant& variant, 208 ProgramType program_type, const ProgramVariant& variant,
179 bool hint_retrievable = false) { 209 bool hint_retrievable = false) {
180 auto base_bindings{variant.base_bindings}; 210 auto base_bindings{variant.base_bindings};
181 const auto primitive_mode{variant.primitive_mode}; 211 const auto primitive_mode{variant.primitive_mode};
182 const auto texture_buffer_usage{variant.texture_buffer_usage}; 212 const auto texture_buffer_usage{variant.texture_buffer_usage};
183 213
184 std::string source = "#version 430 core\n" 214 std::string source = "#version 430 core\n"
185 "#extension GL_ARB_separate_shader_objects : enable\n\n"; 215 "#extension GL_ARB_separate_shader_objects : enable\n"
186 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 216 "#extension GL_NV_gpu_shader5 : enable\n"
217 "#extension GL_NV_shader_thread_group : enable\n";
218 if (entries.shader_viewport_layer_array) {
219 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
220 }
221 if (program_type == ProgramType::Compute) {
222 source += "#extension GL_ARB_compute_variable_group_size : require\n";
223 }
224 source += '\n';
225
226 if (program_type != ProgramType::Compute) {
227 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
228 }
187 229
188 for (const auto& cbuf : entries.const_buffers) { 230 for (const auto& cbuf : entries.const_buffers) {
189 source += 231 source +=
@@ -207,17 +249,24 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
207 if (!texture_buffer_usage.test(i)) { 249 if (!texture_buffer_usage.test(i)) {
208 continue; 250 continue;
209 } 251 }
210 source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); 252 source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
253 }
254 if (texture_buffer_usage.any()) {
255 source += '\n';
211 } 256 }
212 257
213 if (program_type == Maxwell::ShaderProgram::Geometry) { 258 if (program_type == ProgramType::Geometry) {
214 const auto [glsl_topology, debug_name, max_vertices] = 259 const auto [glsl_topology, debug_name, max_vertices] =
215 GetPrimitiveDescription(primitive_mode); 260 GetPrimitiveDescription(primitive_mode);
216 261
217 source += "layout (" + std::string(glsl_topology) + ") in;\n"; 262 source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
218 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; 263 source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
219 } 264 }
265 if (program_type == ProgramType::Compute) {
266 source += "layout (local_size_variable) in;\n";
267 }
220 268
269 source += '\n';
221 source += code; 270 source += code;
222 271
223 OGLShader shader; 272 OGLShader shader;
@@ -244,9 +293,9 @@ std::set<GLenum> GetSupportedFormats() {
244 293
245} // Anonymous namespace 294} // Anonymous namespace
246 295
247CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, 296CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
248 GLShader::ProgramResult result) 297 GLShader::ProgramResult result)
249 : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, 298 : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
250 unique_identifier{params.unique_identifier}, program_type{program_type}, 299 unique_identifier{params.unique_identifier}, program_type{program_type},
251 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, 300 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
252 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} 301 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
@@ -257,29 +306,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
257 ProgramCode&& program_code_b) { 306 ProgramCode&& program_code_b) {
258 const auto code_size{CalculateProgramSize(program_code)}; 307 const auto code_size{CalculateProgramSize(program_code)};
259 const auto code_size_b{CalculateProgramSize(program_code_b)}; 308 const auto code_size_b{CalculateProgramSize(program_code_b)};
260 auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; 309 auto result{
310 CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
261 if (result.first.empty()) { 311 if (result.first.empty()) {
262 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now 312 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
263 return {}; 313 return {};
264 } 314 }
265 315
266 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 316 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
267 params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), 317 params.unique_identifier, GetProgramType(program_type),
268 static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), 318 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
269 std::move(program_code_b))); 319 std::move(program_code), std::move(program_code_b)));
270 320
271 return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); 321 return std::shared_ptr<CachedShader>(
322 new CachedShader(params, GetProgramType(program_type), std::move(result)));
272} 323}
273 324
274Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, 325Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
275 Maxwell::ShaderProgram program_type, 326 Maxwell::ShaderProgram program_type,
276 GLShader::ProgramResult result) { 327 GLShader::ProgramResult result) {
277 return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); 328 return std::shared_ptr<CachedShader>(
329 new CachedShader(params, GetProgramType(program_type), std::move(result)));
330}
331
332Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
333 auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
334
335 const auto code_size{CalculateProgramSize(code)};
336 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
337 static_cast<u32>(code_size / sizeof(u64)), 0,
338 std::move(code), {}));
339
340 return std::shared_ptr<CachedShader>(
341 new CachedShader(params, ProgramType::Compute, std::move(result)));
342}
343
344Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
345 GLShader::ProgramResult result) {
346 return std::shared_ptr<CachedShader>(
347 new CachedShader(params, ProgramType::Compute, std::move(result)));
278} 348}
279 349
280std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 350std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
281 GLuint handle{}; 351 GLuint handle{};
282 if (program_type == Maxwell::ShaderProgram::Geometry) { 352 if (program_type == ProgramType::Geometry) {
283 handle = GetGeometryShader(variant); 353 handle = GetGeometryShader(variant);
284 } else { 354 } else {
285 const auto [entry, is_cache_miss] = programs.try_emplace(variant); 355 const auto [entry, is_cache_miss] = programs.try_emplace(variant);
@@ -297,8 +367,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
297 handle = program->handle; 367 handle = program->handle;
298 } 368 }
299 369
300 auto base_bindings{variant.base_bindings}; 370 auto base_bindings = variant.base_bindings;
301 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; 371 base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size());
372 if (program_type != ProgramType::Compute) {
373 base_bindings.cbuf += STAGE_RESERVED_UBOS;
374 }
302 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); 375 base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
303 base_bindings.sampler += static_cast<u32>(entries.samplers.size()); 376 base_bindings.sampler += static_cast<u32>(entries.samplers.size());
304 377
@@ -561,7 +634,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
561} 634}
562 635
563Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 636Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
564 if (!system.GPU().Maxwell3D().dirty_flags.shaders) { 637 if (!system.GPU().Maxwell3D().dirty.shaders) {
565 return last_shaders[static_cast<std::size_t>(program)]; 638 return last_shaders[static_cast<std::size_t>(program)];
566 } 639 }
567 640
@@ -578,13 +651,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
578 // No shader found - create a new one 651 // No shader found - create a new one
579 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 652 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
580 ProgramCode program_code_b; 653 ProgramCode program_code_b;
581 if (program == Maxwell::ShaderProgram::VertexA) { 654 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
655 if (is_program_a) {
582 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 656 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
583 program_code_b = GetShaderCode(memory_manager, program_addr_b, 657 program_code_b = GetShaderCode(memory_manager, program_addr_b,
584 memory_manager.GetPointer(program_addr_b)); 658 memory_manager.GetPointer(program_addr_b));
585 } 659 }
586 660
587 const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 661 const auto unique_identifier =
662 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
588 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 663 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
589 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 664 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
590 host_ptr, unique_identifier}; 665 host_ptr, unique_identifier};
@@ -601,4 +676,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
601 return last_shaders[static_cast<std::size_t>(program)] = shader; 676 return last_shaders[static_cast<std::size_t>(program)] = shader;
602} 677}
603 678
679Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
680 auto& memory_manager{system.GPU().MemoryManager()};
681 const auto host_ptr{memory_manager.GetPointer(code_addr)};
682 auto kernel = TryGet(host_ptr);
683 if (kernel) {
684 return kernel;
685 }
686
687 // No kernel found - create a new one
688 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
689 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
690 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
691 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
692 host_ptr, unique_identifier};
693
694 const auto found = precompiled_shaders.find(unique_identifier);
695 if (found == precompiled_shaders.end()) {
696 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
697 } else {
698 kernel = CachedShader::CreateKernelFromCache(params, found->second);
699 }
700
701 Register(kernel);
702 return kernel;
703}
704
604} // namespace OpenGL 705} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index bbb53cdf4..2c8faf855 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -61,6 +61,11 @@ public:
61 Maxwell::ShaderProgram program_type, 61 Maxwell::ShaderProgram program_type,
62 GLShader::ProgramResult result); 62 GLShader::ProgramResult result);
63 63
64 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
65
66 static Shader CreateKernelFromCache(const ShaderParameters& params,
67 GLShader::ProgramResult result);
68
64 VAddr GetCpuAddr() const override { 69 VAddr GetCpuAddr() const override {
65 return cpu_addr; 70 return cpu_addr;
66 } 71 }
@@ -78,7 +83,7 @@ public:
78 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 83 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
79 84
80private: 85private:
81 explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, 86 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
82 GLShader::ProgramResult result); 87 GLShader::ProgramResult result);
83 88
84 // Geometry programs. These are needed because GLSL needs an input topology but it's not 89 // Geometry programs. These are needed because GLSL needs an input topology but it's not
@@ -101,10 +106,9 @@ private:
101 106
102 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; 107 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
103 108
104 u8* host_ptr{};
105 VAddr cpu_addr{}; 109 VAddr cpu_addr{};
106 u64 unique_identifier{}; 110 u64 unique_identifier{};
107 Maxwell::ShaderProgram program_type{}; 111 ProgramType program_type{};
108 ShaderDiskCacheOpenGL& disk_cache; 112 ShaderDiskCacheOpenGL& disk_cache;
109 const PrecompiledPrograms& precompiled_programs; 113 const PrecompiledPrograms& precompiled_programs;
110 114
@@ -132,6 +136,9 @@ public:
132 /// Gets the current specified shader stage program 136 /// Gets the current specified shader stage program
133 Shader GetStageProgram(Maxwell::ShaderProgram program); 137 Shader GetStageProgram(Maxwell::ShaderProgram program);
134 138
139 /// Gets a compute kernel in the passed address
140 Shader GetComputeKernel(GPUVAddr code_addr);
141
135protected: 142protected:
136 // We do not have to flush this cache as things in it are never modified by us. 143 // We do not have to flush this cache as things in it are never modified by us.
137 void FlushObjectInner(const Shader& object) override {} 144 void FlushObjectInner(const Shader& object) override {}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 5f2f1510c..359d58cbe 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/assert.h" 15#include "common/assert.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/logging/log.h"
17#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
18#include "video_core/renderer_opengl/gl_device.h" 19#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 20#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -36,7 +37,6 @@ using namespace std::string_literals;
36using namespace VideoCommon::Shader; 37using namespace VideoCommon::Shader;
37 38
38using Maxwell = Tegra::Engines::Maxwell3D::Regs; 39using Maxwell = Tegra::Engines::Maxwell3D::Regs;
39using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
40using Operation = const OperationNode&; 40using Operation = const OperationNode&;
41 41
42enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; 42enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -46,7 +46,7 @@ using TextureArgument = std::pair<Type, Node>;
46using TextureIR = std::variant<TextureAoffi, TextureArgument>; 46using TextureIR = std::variant<TextureAoffi, TextureArgument>;
47 47
48constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 48constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
49 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 49 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
50 50
51class ShaderWriter { 51class ShaderWriter {
52public: 52public:
@@ -161,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 161 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
162} 162}
163 163
164constexpr bool IsVertexShader(ProgramType stage) {
165 return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
166}
167
164class GLSLDecompiler final { 168class GLSLDecompiler final {
165public: 169public:
166 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, 170 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
167 std::string suffix) 171 std::string suffix)
168 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} 172 : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
169 173
@@ -191,10 +195,12 @@ public:
191 195
192 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems 196 // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
193 // unlikely that shaders will use 20 nested SSYs and PBKs. 197 // unlikely that shaders will use 20 nested SSYs and PBKs.
194 constexpr u32 FLOW_STACK_SIZE = 20; 198 if (!ir.IsFlowStackDisabled()) {
195 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { 199 constexpr u32 FLOW_STACK_SIZE = 20;
196 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); 200 for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
197 code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); 201 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
202 code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
203 }
198 } 204 }
199 205
200 code.AddLine("while (true) {{"); 206 code.AddLine("while (true) {{");
@@ -244,24 +250,22 @@ public:
244 usage.is_read, usage.is_written); 250 usage.is_read, usage.is_written);
245 } 251 }
246 entries.clip_distances = ir.GetClipDistances(); 252 entries.clip_distances = ir.GetClipDistances();
253 entries.shader_viewport_layer_array =
254 IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
247 entries.shader_length = ir.GetLength(); 255 entries.shader_length = ir.GetLength();
248 return entries; 256 return entries;
249 } 257 }
250 258
251private: 259private:
252 using OperationDecompilerFn = std::string (GLSLDecompiler::*)(Operation);
253 using OperationDecompilersArray =
254 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
255
256 void DeclareVertex() { 260 void DeclareVertex() {
257 if (stage != ShaderStage::Vertex) 261 if (!IsVertexShader(stage))
258 return; 262 return;
259 263
260 DeclareVertexRedeclarations(); 264 DeclareVertexRedeclarations();
261 } 265 }
262 266
263 void DeclareGeometry() { 267 void DeclareGeometry() {
264 if (stage != ShaderStage::Geometry) { 268 if (stage != ProgramType::Geometry) {
265 return; 269 return;
266 } 270 }
267 271
@@ -280,22 +284,35 @@ private:
280 } 284 }
281 285
282 void DeclareVertexRedeclarations() { 286 void DeclareVertexRedeclarations() {
283 bool clip_distances_declared = false;
284
285 code.AddLine("out gl_PerVertex {{"); 287 code.AddLine("out gl_PerVertex {{");
286 ++code.scope; 288 ++code.scope;
287 289
288 code.AddLine("vec4 gl_Position;"); 290 code.AddLine("vec4 gl_Position;");
289 291
290 for (const auto o : ir.GetOutputAttributes()) { 292 for (const auto attribute : ir.GetOutputAttributes()) {
291 if (o == Attribute::Index::PointSize) 293 if (attribute == Attribute::Index::ClipDistances0123 ||
292 code.AddLine("float gl_PointSize;"); 294 attribute == Attribute::Index::ClipDistances4567) {
293 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
294 o == Attribute::Index::ClipDistances4567)) {
295 code.AddLine("float gl_ClipDistance[];"); 295 code.AddLine("float gl_ClipDistance[];");
296 clip_distances_declared = true; 296 break;
297 } 297 }
298 } 298 }
299 if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
300 if (ir.UsesLayer()) {
301 code.AddLine("int gl_Layer;");
302 }
303 if (ir.UsesViewportIndex()) {
304 code.AddLine("int gl_ViewportIndex;");
305 }
306 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
307 !device.HasVertexViewportLayer()) {
308 LOG_ERROR(
309 Render_OpenGL,
310 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
311 }
312
313 if (ir.UsesPointSize()) {
314 code.AddLine("float gl_PointSize;");
315 }
299 316
300 --code.scope; 317 --code.scope;
301 code.AddLine("}};"); 318 code.AddLine("}};");
@@ -323,11 +340,16 @@ private:
323 } 340 }
324 341
325 void DeclareLocalMemory() { 342 void DeclareLocalMemory() {
326 if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { 343 // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
327 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; 344 // specialization time.
328 code.AddLine("float {}[{}];", GetLocalMemory(), element_count); 345 const u64 local_memory_size =
329 code.AddNewLine(); 346 stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize();
347 if (local_memory_size == 0) {
348 return;
330 } 349 }
350 const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
351 code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
352 code.AddNewLine();
331 } 353 }
332 354
333 void DeclareInternalFlags() { 355 void DeclareInternalFlags() {
@@ -381,12 +403,12 @@ private:
381 const u32 location{GetGenericAttributeIndex(index)}; 403 const u32 location{GetGenericAttributeIndex(index)};
382 404
383 std::string name{GetInputAttribute(index)}; 405 std::string name{GetInputAttribute(index)};
384 if (stage == ShaderStage::Geometry) { 406 if (stage == ProgramType::Geometry) {
385 name = "gs_" + name + "[]"; 407 name = "gs_" + name + "[]";
386 } 408 }
387 409
388 std::string suffix; 410 std::string suffix;
389 if (stage == ShaderStage::Fragment) { 411 if (stage == ProgramType::Fragment) {
390 const auto input_mode{header.ps.GetAttributeUse(location)}; 412 const auto input_mode{header.ps.GetAttributeUse(location)};
391 if (skip_unused && input_mode == AttributeUse::Unused) { 413 if (skip_unused && input_mode == AttributeUse::Unused) {
392 return; 414 return;
@@ -398,7 +420,7 @@ private:
398 } 420 }
399 421
400 void DeclareOutputAttributes() { 422 void DeclareOutputAttributes() {
401 if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { 423 if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) {
402 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { 424 for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) {
403 DeclareOutputAttribute(ToGenericAttribute(i)); 425 DeclareOutputAttribute(ToGenericAttribute(i));
404 } 426 }
@@ -520,7 +542,7 @@ private:
520 constexpr u32 element_stride{4}; 542 constexpr u32 element_stride{4};
521 const u32 address{generic_base + index * generic_stride + element * element_stride}; 543 const u32 address{generic_base + index * generic_stride + element * element_stride};
522 544
523 const bool declared{stage != ShaderStage::Fragment || 545 const bool declared{stage != ProgramType::Fragment ||
524 header.ps.GetAttributeUse(index) != AttributeUse::Unused}; 546 header.ps.GetAttributeUse(index) != AttributeUse::Unused};
525 const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; 547 const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
526 code.AddLine("case 0x{:x}: return {};", address, value); 548 code.AddLine("case 0x{:x}: return {};", address, value);
@@ -543,7 +565,7 @@ private:
543 case Tegra::Shader::ImageType::Texture1D: 565 case Tegra::Shader::ImageType::Texture1D:
544 return "image1D"; 566 return "image1D";
545 case Tegra::Shader::ImageType::TextureBuffer: 567 case Tegra::Shader::ImageType::TextureBuffer:
546 return "bufferImage"; 568 return "imageBuffer";
547 case Tegra::Shader::ImageType::Texture1DArray: 569 case Tegra::Shader::ImageType::Texture1DArray:
548 return "image1DArray"; 570 return "image1DArray";
549 case Tegra::Shader::ImageType::Texture2D: 571 case Tegra::Shader::ImageType::Texture2D:
@@ -624,7 +646,7 @@ private:
624 } 646 }
625 647
626 if (const auto abuf = std::get_if<AbufNode>(&*node)) { 648 if (const auto abuf = std::get_if<AbufNode>(&*node)) {
627 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, 649 UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
628 "Physical attributes in geometry shaders are not implemented"); 650 "Physical attributes in geometry shaders are not implemented");
629 if (abuf->IsPhysicalBuffer()) { 651 if (abuf->IsPhysicalBuffer()) {
630 return fmt::format("readPhysicalAttribute(ftou({}))", 652 return fmt::format("readPhysicalAttribute(ftou({}))",
@@ -679,6 +701,9 @@ private:
679 } 701 }
680 702
681 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 703 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
704 if (stage == ProgramType::Compute) {
705 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
706 }
682 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 707 return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
683 } 708 }
684 709
@@ -708,7 +733,7 @@ private:
708 733
709 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { 734 std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
710 const auto GeometryPass = [&](std::string_view name) { 735 const auto GeometryPass = [&](std::string_view name) {
711 if (stage == ShaderStage::Geometry && buffer) { 736 if (stage == ProgramType::Geometry && buffer) {
712 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games 737 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
713 // set an 0x80000000 index for those and the shader fails to build. Find out why 738 // set an 0x80000000 index for those and the shader fails to build. Find out why
714 // this happens and what's its intent. 739 // this happens and what's its intent.
@@ -720,10 +745,10 @@ private:
720 switch (attribute) { 745 switch (attribute) {
721 case Attribute::Index::Position: 746 case Attribute::Index::Position:
722 switch (stage) { 747 switch (stage) {
723 case ShaderStage::Geometry: 748 case ProgramType::Geometry:
724 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), 749 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
725 GetSwizzle(element)); 750 GetSwizzle(element));
726 case ShaderStage::Fragment: 751 case ProgramType::Fragment:
727 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); 752 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
728 default: 753 default:
729 UNREACHABLE(); 754 UNREACHABLE();
@@ -744,7 +769,7 @@ private:
744 // TODO(Subv): Find out what the values are for the first two elements when inside a 769 // TODO(Subv): Find out what the values are for the first two elements when inside a
745 // vertex shader, and what's the value of the fourth element when inside a Tess Eval 770 // vertex shader, and what's the value of the fourth element when inside a Tess Eval
746 // shader. 771 // shader.
747 ASSERT(stage == ShaderStage::Vertex); 772 ASSERT(IsVertexShader(stage));
748 switch (element) { 773 switch (element) {
749 case 2: 774 case 2:
750 // Config pack's first value is instance_id. 775 // Config pack's first value is instance_id.
@@ -756,7 +781,7 @@ private:
756 return "0"; 781 return "0";
757 case Attribute::Index::FrontFacing: 782 case Attribute::Index::FrontFacing:
758 // TODO(Subv): Find out what the values are for the other elements. 783 // TODO(Subv): Find out what the values are for the other elements.
759 ASSERT(stage == ShaderStage::Fragment); 784 ASSERT(stage == ProgramType::Fragment);
760 switch (element) { 785 switch (element) {
761 case 3: 786 case 3:
762 return "itof(gl_FrontFacing ? -1 : 0)"; 787 return "itof(gl_FrontFacing ? -1 : 0)";
@@ -778,7 +803,7 @@ private:
778 return value; 803 return value;
779 } 804 }
780 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders 805 // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
781 const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; 806 const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
782 807
783 const std::string temporary = code.GenerateTemporary(); 808 const std::string temporary = code.GenerateTemporary();
784 code.AddLine("{}float {} = {};", precise, temporary, value); 809 code.AddLine("{}float {} = {};", precise, temporary, value);
@@ -803,6 +828,45 @@ private:
803 return CastOperand(VisitOperand(operation, operand_index), type); 828 return CastOperand(VisitOperand(operation, operand_index), type);
804 } 829 }
805 830
831 std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
832 switch (const auto attribute = abuf->GetIndex()) {
833 case Attribute::Index::Position:
834 return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
835 case Attribute::Index::LayerViewportPointSize:
836 switch (abuf->GetElement()) {
837 case 0:
838 UNIMPLEMENTED();
839 return {};
840 case 1:
841 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
842 return {};
843 }
844 return std::make_pair("gl_Layer", true);
845 case 2:
846 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
847 return {};
848 }
849 return std::make_pair("gl_ViewportIndex", true);
850 case 3:
851 UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
852 return std::make_pair("gl_PointSize", false);
853 }
854 return {};
855 case Attribute::Index::ClipDistances0123:
856 return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
857 case Attribute::Index::ClipDistances4567:
858 return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
859 false);
860 default:
861 if (IsGenericAttribute(attribute)) {
862 return std::make_pair(
863 GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
864 }
865 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
866 return {};
867 }
868 }
869
806 std::string CastOperand(const std::string& value, Type type) const { 870 std::string CastOperand(const std::string& value, Type type) const {
807 switch (type) { 871 switch (type) {
808 case Type::Bool: 872 case Type::Bool:
@@ -999,6 +1063,8 @@ private:
999 const Node& src = operation[1]; 1063 const Node& src = operation[1];
1000 1064
1001 std::string target; 1065 std::string target;
1066 bool is_integer = false;
1067
1002 if (const auto gpr = std::get_if<GprNode>(&*dest)) { 1068 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1003 if (gpr->GetIndex() == Register::ZeroIndex) { 1069 if (gpr->GetIndex() == Register::ZeroIndex) {
1004 // Writing to Register::ZeroIndex is a no op 1070 // Writing to Register::ZeroIndex is a no op
@@ -1007,27 +1073,16 @@ private:
1007 target = GetRegister(gpr->GetIndex()); 1073 target = GetRegister(gpr->GetIndex());
1008 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { 1074 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1009 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); 1075 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1010 1076 const auto result = GetOutputAttribute(abuf);
1011 target = [&]() -> std::string { 1077 if (!result) {
1012 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { 1078 return {};
1013 case Attribute::Index::Position: 1079 }
1014 return "gl_Position"s + GetSwizzle(abuf->GetElement()); 1080 target = result->first;
1015 case Attribute::Index::PointSize: 1081 is_integer = result->second;
1016 return "gl_PointSize";
1017 case Attribute::Index::ClipDistances0123:
1018 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
1019 case Attribute::Index::ClipDistances4567:
1020 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
1021 default:
1022 if (IsGenericAttribute(attribute)) {
1023 return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
1024 }
1025 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1026 static_cast<u32>(attribute));
1027 return "0";
1028 }
1029 }();
1030 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1082 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1083 if (stage == ProgramType::Compute) {
1084 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
1085 }
1031 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 1086 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
1032 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1087 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1033 const std::string real = Visit(gmem->GetRealAddress()); 1088 const std::string real = Visit(gmem->GetRealAddress());
@@ -1038,7 +1093,11 @@ private:
1038 UNREACHABLE_MSG("Assign called without a proper target"); 1093 UNREACHABLE_MSG("Assign called without a proper target");
1039 } 1094 }
1040 1095
1041 code.AddLine("{} = {};", target, Visit(src)); 1096 if (is_integer) {
1097 code.AddLine("{} = ftoi({});", target, Visit(src));
1098 } else {
1099 code.AddLine("{} = {};", target, Visit(src));
1100 }
1042 return {}; 1101 return {};
1043 } 1102 }
1044 1103
@@ -1077,6 +1136,16 @@ private:
1077 Type::Float); 1136 Type::Float);
1078 } 1137 }
1079 1138
1139 std::string FCastHalf0(Operation operation) {
1140 const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
1141 return fmt::format("({})[0]", op_a);
1142 }
1143
1144 std::string FCastHalf1(Operation operation) {
1145 const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
1146 return fmt::format("({})[1]", op_a);
1147 }
1148
1080 template <Type type> 1149 template <Type type>
1081 std::string Min(Operation operation) { 1150 std::string Min(Operation operation) {
1082 return GenerateBinaryCall(operation, "min", type, type, type); 1151 return GenerateBinaryCall(operation, "min", type, type, type);
@@ -1233,6 +1302,11 @@ private:
1233 return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); 1302 return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
1234 } 1303 }
1235 1304
1305 std::string HCastFloat(Operation operation) {
1306 const std::string op_a = VisitOperand(operation, 0, Type::Float);
1307 return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
1308 }
1309
1236 std::string HUnpack(Operation operation) { 1310 std::string HUnpack(Operation operation) {
1237 const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; 1311 const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
1238 const auto value = [&]() -> std::string { 1312 const auto value = [&]() -> std::string {
@@ -1351,14 +1425,10 @@ private:
1351 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint)); 1425 return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
1352 } 1426 }
1353 1427
1354 std::string LogicalAll2(Operation operation) { 1428 std::string LogicalAnd2(Operation operation) {
1355 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2); 1429 return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
1356 } 1430 }
1357 1431
1358 std::string LogicalAny2(Operation operation) {
1359 return GenerateUnary(operation, "any", Type::Bool, Type::Bool2);
1360 }
1361
1362 template <bool with_nan> 1432 template <bool with_nan>
1363 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) { 1433 std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
1364 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2, 1434 const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
@@ -1555,6 +1625,14 @@ private:
1555 return {}; 1625 return {};
1556 } 1626 }
1557 1627
1628 std::string BranchIndirect(Operation operation) {
1629 const std::string op_a = VisitOperand(operation, 0, Type::Uint);
1630
1631 code.AddLine("jmp_to = {};", op_a);
1632 code.AddLine("break;");
1633 return {};
1634 }
1635
1558 std::string PushFlowStack(Operation operation) { 1636 std::string PushFlowStack(Operation operation) {
1559 const auto stack = std::get<MetaStackClass>(operation.GetMeta()); 1637 const auto stack = std::get<MetaStackClass>(operation.GetMeta());
1560 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 1638 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
@@ -1573,7 +1651,7 @@ private:
1573 } 1651 }
1574 1652
1575 std::string Exit(Operation operation) { 1653 std::string Exit(Operation operation) {
1576 if (stage != ShaderStage::Fragment) { 1654 if (stage != ProgramType::Fragment) {
1577 code.AddLine("return;"); 1655 code.AddLine("return;");
1578 return {}; 1656 return {};
1579 } 1657 }
@@ -1624,7 +1702,7 @@ private:
1624 } 1702 }
1625 1703
1626 std::string EmitVertex(Operation operation) { 1704 std::string EmitVertex(Operation operation) {
1627 ASSERT_MSG(stage == ShaderStage::Geometry, 1705 ASSERT_MSG(stage == ProgramType::Geometry,
1628 "EmitVertex is expected to be used in a geometry shader."); 1706 "EmitVertex is expected to be used in a geometry shader.");
1629 1707
1630 // If a geometry shader is attached, it will always flip (it's the last stage before 1708 // If a geometry shader is attached, it will always flip (it's the last stage before
@@ -1635,7 +1713,7 @@ private:
1635 } 1713 }
1636 1714
1637 std::string EndPrimitive(Operation operation) { 1715 std::string EndPrimitive(Operation operation) {
1638 ASSERT_MSG(stage == ShaderStage::Geometry, 1716 ASSERT_MSG(stage == ProgramType::Geometry,
1639 "EndPrimitive is expected to be used in a geometry shader."); 1717 "EndPrimitive is expected to be used in a geometry shader.");
1640 1718
1641 code.AddLine("EndPrimitive();"); 1719 code.AddLine("EndPrimitive();");
@@ -1657,7 +1735,49 @@ private:
1657 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')'; 1735 return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
1658 } 1736 }
1659 1737
1660 static constexpr OperationDecompilersArray operation_decompilers = { 1738 std::string BallotThread(Operation operation) {
1739 const std::string value = VisitOperand(operation, 0, Type::Bool);
1740 if (!device.HasWarpIntrinsics()) {
1741 LOG_ERROR(Render_OpenGL,
1742 "Nvidia warp intrinsics are not available and its required by a shader");
1743 // Stub on non-Nvidia devices by simulating all threads voting the same as the active
1744 // one.
1745 return fmt::format("utof({} ? 0xFFFFFFFFU : 0U)", value);
1746 }
1747 return fmt::format("utof(ballotThreadNV({}))", value);
1748 }
1749
1750 std::string Vote(Operation operation, const char* func) {
1751 const std::string value = VisitOperand(operation, 0, Type::Bool);
1752 if (!device.HasWarpIntrinsics()) {
1753 LOG_ERROR(Render_OpenGL,
1754 "Nvidia vote intrinsics are not available and its required by a shader");
1755 // Stub with a warp size of one.
1756 return value;
1757 }
1758 return fmt::format("{}({})", func, value);
1759 }
1760
1761 std::string VoteAll(Operation operation) {
1762 return Vote(operation, "allThreadsNV");
1763 }
1764
1765 std::string VoteAny(Operation operation) {
1766 return Vote(operation, "anyThreadNV");
1767 }
1768
1769 std::string VoteEqual(Operation operation) {
1770 if (!device.HasWarpIntrinsics()) {
1771 LOG_ERROR(Render_OpenGL,
1772 "Nvidia vote intrinsics are not available and its required by a shader");
1773 // We must return true here since a stub for a theoretical warp size of 1 will always
1774 // return an equal result for all its votes.
1775 return "true";
1776 }
1777 return Vote(operation, "allThreadsEqualNV");
1778 }
1779
1780 static constexpr std::array operation_decompilers = {
1661 &GLSLDecompiler::Assign, 1781 &GLSLDecompiler::Assign,
1662 1782
1663 &GLSLDecompiler::Select, 1783 &GLSLDecompiler::Select,
@@ -1669,6 +1789,8 @@ private:
1669 &GLSLDecompiler::Negate<Type::Float>, 1789 &GLSLDecompiler::Negate<Type::Float>,
1670 &GLSLDecompiler::Absolute<Type::Float>, 1790 &GLSLDecompiler::Absolute<Type::Float>,
1671 &GLSLDecompiler::FClamp, 1791 &GLSLDecompiler::FClamp,
1792 &GLSLDecompiler::FCastHalf0,
1793 &GLSLDecompiler::FCastHalf1,
1672 &GLSLDecompiler::Min<Type::Float>, 1794 &GLSLDecompiler::Min<Type::Float>,
1673 &GLSLDecompiler::Max<Type::Float>, 1795 &GLSLDecompiler::Max<Type::Float>,
1674 &GLSLDecompiler::FCos, 1796 &GLSLDecompiler::FCos,
@@ -1729,6 +1851,7 @@ private:
1729 &GLSLDecompiler::Absolute<Type::HalfFloat>, 1851 &GLSLDecompiler::Absolute<Type::HalfFloat>,
1730 &GLSLDecompiler::HNegate, 1852 &GLSLDecompiler::HNegate,
1731 &GLSLDecompiler::HClamp, 1853 &GLSLDecompiler::HClamp,
1854 &GLSLDecompiler::HCastFloat,
1732 &GLSLDecompiler::HUnpack, 1855 &GLSLDecompiler::HUnpack,
1733 &GLSLDecompiler::HMergeF32, 1856 &GLSLDecompiler::HMergeF32,
1734 &GLSLDecompiler::HMergeH0, 1857 &GLSLDecompiler::HMergeH0,
@@ -1741,8 +1864,7 @@ private:
1741 &GLSLDecompiler::LogicalXor, 1864 &GLSLDecompiler::LogicalXor,
1742 &GLSLDecompiler::LogicalNegate, 1865 &GLSLDecompiler::LogicalNegate,
1743 &GLSLDecompiler::LogicalPick2, 1866 &GLSLDecompiler::LogicalPick2,
1744 &GLSLDecompiler::LogicalAll2, 1867 &GLSLDecompiler::LogicalAnd2,
1745 &GLSLDecompiler::LogicalAny2,
1746 1868
1747 &GLSLDecompiler::LogicalLessThan<Type::Float>, 1869 &GLSLDecompiler::LogicalLessThan<Type::Float>,
1748 &GLSLDecompiler::LogicalEqual<Type::Float>, 1870 &GLSLDecompiler::LogicalEqual<Type::Float>,
@@ -1789,6 +1911,7 @@ private:
1789 &GLSLDecompiler::ImageStore, 1911 &GLSLDecompiler::ImageStore,
1790 1912
1791 &GLSLDecompiler::Branch, 1913 &GLSLDecompiler::Branch,
1914 &GLSLDecompiler::BranchIndirect,
1792 &GLSLDecompiler::PushFlowStack, 1915 &GLSLDecompiler::PushFlowStack,
1793 &GLSLDecompiler::PopFlowStack, 1916 &GLSLDecompiler::PopFlowStack,
1794 &GLSLDecompiler::Exit, 1917 &GLSLDecompiler::Exit,
@@ -1804,7 +1927,13 @@ private:
1804 &GLSLDecompiler::WorkGroupId<0>, 1927 &GLSLDecompiler::WorkGroupId<0>,
1805 &GLSLDecompiler::WorkGroupId<1>, 1928 &GLSLDecompiler::WorkGroupId<1>,
1806 &GLSLDecompiler::WorkGroupId<2>, 1929 &GLSLDecompiler::WorkGroupId<2>,
1930
1931 &GLSLDecompiler::BallotThread,
1932 &GLSLDecompiler::VoteAll,
1933 &GLSLDecompiler::VoteAny,
1934 &GLSLDecompiler::VoteEqual,
1807 }; 1935 };
1936 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1808 1937
1809 std::string GetRegister(u32 index) const { 1938 std::string GetRegister(u32 index) const {
1810 return GetDeclarationWithSuffix(index, "gpr"); 1939 return GetDeclarationWithSuffix(index, "gpr");
@@ -1869,7 +1998,7 @@ private:
1869 } 1998 }
1870 1999
1871 u32 GetNumPhysicalInputAttributes() const { 2000 u32 GetNumPhysicalInputAttributes() const {
1872 return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); 2001 return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
1873 } 2002 }
1874 2003
1875 u32 GetNumPhysicalAttributes() const { 2004 u32 GetNumPhysicalAttributes() const {
@@ -1882,7 +2011,7 @@ private:
1882 2011
1883 const Device& device; 2012 const Device& device;
1884 const ShaderIR& ir; 2013 const ShaderIR& ir;
1885 const ShaderStage stage; 2014 const ProgramType stage;
1886 const std::string suffix; 2015 const std::string suffix;
1887 const Header header; 2016 const Header header;
1888 2017
@@ -1913,7 +2042,7 @@ std::string GetCommonDeclarations() {
1913 MAX_CONSTBUFFER_ELEMENTS); 2042 MAX_CONSTBUFFER_ELEMENTS);
1914} 2043}
1915 2044
1916ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, 2045ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
1917 const std::string& suffix) { 2046 const std::string& suffix) {
1918 GLSLDecompiler decompiler(device, ir, stage, suffix); 2047 GLSLDecompiler decompiler(device, ir, stage, suffix);
1919 decompiler.Decompile(); 2048 decompiler.Decompile();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 14d11c7fc..2ea02f5bf 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -12,14 +12,26 @@
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/shader/shader_ir.h" 13#include "video_core/shader/shader_ir.h"
14 14
15namespace OpenGL {
16class Device;
17}
18
19namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
20class ShaderIR; 16class ShaderIR;
21} 17}
22 18
19namespace OpenGL {
20
21class Device;
22
23enum class ProgramType : u32 {
24 VertexA = 0,
25 VertexB = 1,
26 TessellationControl = 2,
27 TessellationEval = 3,
28 Geometry = 4,
29 Fragment = 5,
30 Compute = 6
31};
32
33} // namespace OpenGL
34
23namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
24 36
25struct ShaderEntries; 37struct ShaderEntries;
@@ -78,12 +90,13 @@ struct ShaderEntries {
78 std::vector<ImageEntry> images; 90 std::vector<ImageEntry> images;
79 std::vector<GlobalMemoryEntry> global_memory_entries; 91 std::vector<GlobalMemoryEntry> global_memory_entries;
80 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 92 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
93 bool shader_viewport_layer_array{};
81 std::size_t shader_length{}; 94 std::size_t shader_length{};
82}; 95};
83 96
84std::string GetCommonDeclarations(); 97std::string GetCommonDeclarations();
85 98
86ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 99ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
87 Maxwell::ShaderStage stage, const std::string& suffix); 100 ProgramType stage, const std::string& suffix);
88 101
89} // namespace OpenGL::GLShader 102} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 10688397b..969fe9ced 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
51 51
52} // namespace 52} // namespace
53 53
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b, 55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 56 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 57 : unique_identifier{unique_identifier}, program_type{program_type},
@@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
373 } 373 }
374 } 374 }
375 375
376 bool shader_viewport_layer_array{};
377 if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
378 return {};
379 }
380 entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
381
376 u64 shader_length{}; 382 u64 shader_length{};
377 if (!LoadObjectFromPrecompiled(shader_length)) { 383 if (!LoadObjectFromPrecompiled(shader_length)) {
378 return {}; 384 return {};
@@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
445 } 451 }
446 } 452 }
447 453
454 if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
455 return false;
456 }
457
448 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { 458 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
449 return false; 459 return false;
450 } 460 }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 4f296dda6..cc8bbd61e 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -18,7 +18,6 @@
18#include "common/assert.h" 18#include "common/assert.h"
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 20#include "core/file_sys/vfs_vector.h"
21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_opengl/gl_shader_gen.h" 21#include "video_core/renderer_opengl/gl_shader_gen.h"
23 22
24namespace Core { 23namespace Core {
@@ -34,14 +33,11 @@ namespace OpenGL {
34struct ShaderDiskCacheUsage; 33struct ShaderDiskCacheUsage;
35struct ShaderDiskCacheDump; 34struct ShaderDiskCacheDump;
36 35
37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
38
39using ProgramCode = std::vector<u64>; 36using ProgramCode = std::vector<u64>;
40using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
41
42using TextureBufferUsage = std::bitset<64>; 38using TextureBufferUsage = std::bitset<64>;
43 39
44/// Allocated bindings used by an OpenGL shader program. 40/// Allocated bindings used by an OpenGL shader program
45struct BaseBindings { 41struct BaseBindings {
46 u32 cbuf{}; 42 u32 cbuf{};
47 u32 gmem{}; 43 u32 gmem{};
@@ -126,7 +122,7 @@ namespace OpenGL {
126/// Describes a shader how it's used by the guest GPU 122/// Describes a shader how it's used by the guest GPU
127class ShaderDiskCacheRaw { 123class ShaderDiskCacheRaw {
128public: 124public:
129 explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, 125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
130 u32 program_code_size, u32 program_code_size_b, 126 u32 program_code_size, u32 program_code_size_b,
131 ProgramCode program_code, ProgramCode program_code_b); 127 ProgramCode program_code, ProgramCode program_code_b);
132 ShaderDiskCacheRaw(); 128 ShaderDiskCacheRaw();
@@ -141,30 +137,13 @@ public:
141 } 137 }
142 138
143 bool HasProgramA() const { 139 bool HasProgramA() const {
144 return program_type == Maxwell::ShaderProgram::VertexA; 140 return program_type == ProgramType::VertexA;
145 } 141 }
146 142
147 Maxwell::ShaderProgram GetProgramType() const { 143 ProgramType GetProgramType() const {
148 return program_type; 144 return program_type;
149 } 145 }
150 146
151 Maxwell::ShaderStage GetProgramStage() const {
152 switch (program_type) {
153 case Maxwell::ShaderProgram::VertexA:
154 case Maxwell::ShaderProgram::VertexB:
155 return Maxwell::ShaderStage::Vertex;
156 case Maxwell::ShaderProgram::TesselationControl:
157 return Maxwell::ShaderStage::TesselationControl;
158 case Maxwell::ShaderProgram::TesselationEval:
159 return Maxwell::ShaderStage::TesselationEval;
160 case Maxwell::ShaderProgram::Geometry:
161 return Maxwell::ShaderStage::Geometry;
162 case Maxwell::ShaderProgram::Fragment:
163 return Maxwell::ShaderStage::Fragment;
164 }
165 UNREACHABLE();
166 }
167
168 const ProgramCode& GetProgramCode() const { 147 const ProgramCode& GetProgramCode() const {
169 return program_code; 148 return program_code;
170 } 149 }
@@ -175,7 +154,7 @@ public:
175 154
176private: 155private:
177 u64 unique_identifier{}; 156 u64 unique_identifier{};
178 Maxwell::ShaderProgram program_type{}; 157 ProgramType program_type{};
179 u32 program_code_size{}; 158 u32 program_code_size{};
180 u32 program_code_size_b{}; 159 u32 program_code_size_b{};
181 160
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9148629ec..3a8d9e1da 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D;
14using VideoCommon::Shader::ProgramCode; 14using VideoCommon::Shader::ProgramCode;
15using VideoCommon::Shader::ShaderIR; 15using VideoCommon::Shader::ShaderIR;
16 16
17static constexpr u32 PROGRAM_OFFSET{10}; 17static constexpr u32 PROGRAM_OFFSET = 10;
18static constexpr u32 COMPUTE_OFFSET = 0;
18 19
19ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { 20ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
20 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 21 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
@@ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
29}; 30};
30 31
31)"; 32)";
32 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
33 ProgramResult program =
34 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
35 33
34 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
35 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
36 ProgramResult program = Decompile(device, program_ir, stage, "vertex");
36 out += program.first; 37 out += program.first;
37 38
38 if (setup.IsDualProgram()) { 39 if (setup.IsDualProgram()) {
39 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); 40 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
40 ProgramResult program_b = 41 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
41 Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
42
43 out += program_b.first; 42 out += program_b.first;
44 } 43 }
45 44
@@ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
80}; 79};
81 80
82)"; 81)";
83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 82
84 ProgramResult program = 83 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
85 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); 84 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
86 out += program.first; 85 out += program.first;
87 86
88 out += R"( 87 out += R"(
@@ -115,10 +114,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
115}; 114};
116 115
117)"; 116)";
118 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); 117 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
119 ProgramResult program = 118 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
120 Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
121
122 out += program.first; 119 out += program.first;
123 120
124 out += R"( 121 out += R"(
@@ -130,4 +127,22 @@ void main() {
130 return {std::move(out), std::move(program.second)}; 127 return {std::move(out), std::move(program.second)};
131} 128}
132 129
130ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
131 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
132
133 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
134 out += GetCommonDeclarations();
135
136 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
137 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
138 out += program.first;
139
140 out += R"(
141void main() {
142 execute_compute();
143}
144)";
145 return {std::move(out), std::move(program.second)};
146}
147
133} // namespace OpenGL::GLShader 148} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0536c8a03..3833e88ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -27,6 +27,8 @@ struct ShaderSetup {
27 ProgramCode code; 27 ProgramCode code;
28 ProgramCode code_b; // Used for dual vertex shaders 28 ProgramCode code_b; // Used for dual vertex shaders
29 u64 unique_identifier; 29 u64 unique_identifier;
30 std::size_t size_a;
31 std::size_t size_b;
30 } program; 32 } program;
31 33
32 /// Used in scenarios where we have a dual vertex shaders 34 /// Used in scenarios where we have a dual vertex shaders
@@ -52,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
52/// Generates the GLSL fragment shader program source code for the given FS program 54/// Generates the GLSL fragment shader program source code for the given FS program
53ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); 55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
54 56
57/// Generates the GLSL compute shader program source code for the given CS program
58ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
59
55} // namespace OpenGL::GLShader 60} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5f3fe067e..9e74eda0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -10,21 +10,25 @@
10 10
11namespace OpenGL::GLShader { 11namespace OpenGL::GLShader {
12 12
13GLuint LoadShader(const char* source, GLenum type) { 13namespace {
14 const char* debug_type; 14const char* GetStageDebugName(GLenum type) {
15 switch (type) { 15 switch (type) {
16 case GL_VERTEX_SHADER: 16 case GL_VERTEX_SHADER:
17 debug_type = "vertex"; 17 return "vertex";
18 break;
19 case GL_GEOMETRY_SHADER: 18 case GL_GEOMETRY_SHADER:
20 debug_type = "geometry"; 19 return "geometry";
21 break;
22 case GL_FRAGMENT_SHADER: 20 case GL_FRAGMENT_SHADER:
23 debug_type = "fragment"; 21 return "fragment";
24 break; 22 case GL_COMPUTE_SHADER:
25 default: 23 return "compute";
26 UNREACHABLE();
27 } 24 }
25 UNIMPLEMENTED();
26 return "unknown";
27}
28} // Anonymous namespace
29
30GLuint LoadShader(const char* source, GLenum type) {
31 const char* debug_type = GetStageDebugName(type);
28 const GLuint shader_id = glCreateShader(type); 32 const GLuint shader_id = glCreateShader(type);
29 glShaderSource(shader_id, 1, &source, nullptr); 33 glShaderSource(shader_id, 1, &source, nullptr);
30 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); 34 LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d86e137ac..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -6,8 +6,11 @@
6#include <glad/glad.h> 6#include <glad/glad.h>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
10 11
12MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
13
11namespace OpenGL { 14namespace OpenGL {
12 15
13using Maxwell = Tegra::Engines::Maxwell3D::Regs; 16using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -162,6 +165,25 @@ OpenGLState::OpenGLState() {
162 alpha_test.ref = 0.0f; 165 alpha_test.ref = 0.0f;
163} 166}
164 167
168void OpenGLState::SetDefaultViewports() {
169 for (auto& item : viewports) {
170 item.x = 0;
171 item.y = 0;
172 item.width = 0;
173 item.height = 0;
174 item.depth_range_near = 0.0f;
175 item.depth_range_far = 1.0f;
176 item.scissor.enabled = false;
177 item.scissor.x = 0;
178 item.scissor.y = 0;
179 item.scissor.width = 0;
180 item.scissor.height = 0;
181 }
182
183 depth_clamp.far_plane = false;
184 depth_clamp.near_plane = false;
185}
186
165void OpenGLState::ApplyDefaultState() { 187void OpenGLState::ApplyDefaultState() {
166 glEnable(GL_BLEND); 188 glEnable(GL_BLEND);
167 glDisable(GL_FRAMEBUFFER_SRGB); 189 glDisable(GL_FRAMEBUFFER_SRGB);
@@ -523,7 +545,8 @@ void OpenGLState::ApplySamplers() const {
523 } 545 }
524} 546}
525 547
526void OpenGLState::Apply() const { 548void OpenGLState::Apply() {
549 MICROPROFILE_SCOPE(OpenGL_State);
527 ApplyFramebufferState(); 550 ApplyFramebufferState();
528 ApplyVertexArrayState(); 551 ApplyVertexArrayState();
529 ApplyShaderProgram(); 552 ApplyShaderProgram();
@@ -532,19 +555,31 @@ void OpenGLState::Apply() const {
532 ApplyPointSize(); 555 ApplyPointSize();
533 ApplyFragmentColorClamp(); 556 ApplyFragmentColorClamp();
534 ApplyMultisample(); 557 ApplyMultisample();
558 if (dirty.color_mask) {
559 ApplyColorMask();
560 dirty.color_mask = false;
561 }
535 ApplyDepthClamp(); 562 ApplyDepthClamp();
536 ApplyColorMask();
537 ApplyViewport(); 563 ApplyViewport();
538 ApplyStencilTest(); 564 if (dirty.stencil_state) {
565 ApplyStencilTest();
566 dirty.stencil_state = false;
567 }
539 ApplySRgb(); 568 ApplySRgb();
540 ApplyCulling(); 569 ApplyCulling();
541 ApplyDepth(); 570 ApplyDepth();
542 ApplyPrimitiveRestart(); 571 ApplyPrimitiveRestart();
543 ApplyBlending(); 572 if (dirty.blend_state) {
573 ApplyBlending();
574 dirty.blend_state = false;
575 }
544 ApplyLogicOp(); 576 ApplyLogicOp();
545 ApplyTextures(); 577 ApplyTextures();
546 ApplySamplers(); 578 ApplySamplers();
547 ApplyPolygonOffset(); 579 if (dirty.polygon_offset) {
580 ApplyPolygonOffset();
581 dirty.polygon_offset = false;
582 }
548 ApplyAlphaTest(); 583 ApplyAlphaTest();
549} 584}
550 585
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,8 +195,9 @@ public:
195 s_rgb_used = false; 195 s_rgb_used = false;
196 } 196 }
197 197
198 void SetDefaultViewports();
198 /// Apply this state as the current OpenGL state 199 /// Apply this state as the current OpenGL state
199 void Apply() const; 200 void Apply();
200 201
201 void ApplyFramebufferState() const; 202 void ApplyFramebufferState() const;
202 void ApplyVertexArrayState() const; 203 void ApplyVertexArrayState() const;
@@ -237,11 +238,41 @@ public:
237 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test 238 /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
238 void EmulateViewportWithScissor(); 239 void EmulateViewportWithScissor();
239 240
241 void MarkDirtyBlendState() {
242 dirty.blend_state = true;
243 }
244
245 void MarkDirtyStencilState() {
246 dirty.stencil_state = true;
247 }
248
249 void MarkDirtyPolygonOffset() {
250 dirty.polygon_offset = true;
251 }
252
253 void MarkDirtyColorMask() {
254 dirty.color_mask = true;
255 }
256
257 void AllDirty() {
258 dirty.blend_state = true;
259 dirty.stencil_state = true;
260 dirty.polygon_offset = true;
261 dirty.color_mask = true;
262 }
263
240private: 264private:
241 static OpenGLState cur_state; 265 static OpenGLState cur_state;
242 266
243 // Workaround for sRGB problems caused by QT not supporting srgb output 267 // Workaround for sRGB problems caused by QT not supporting srgb output
244 static bool s_rgb_used; 268 static bool s_rgb_used;
269 struct {
270 bool blend_state;
271 bool stencil_state;
272 bool viewport_state;
273 bool polygon_offset;
274 bool color_mask;
275 } dirty{};
245}; 276};
246 277
247} // namespace OpenGL 278} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 08ae1a429..4f135fe03 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
31 31
32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
34MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35 MP_RGB(128, 192, 128));
34 36
35namespace { 37namespace {
36 38
@@ -135,7 +137,6 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
135const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 137const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
136 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 138 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
137 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; 139 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
138 ASSERT(component_type == format.component_type);
139 return format; 140 return format;
140} 141}
141 142
@@ -183,6 +184,9 @@ GLint GetSwizzleSource(SwizzleSource source) {
183} 184}
184 185
185void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { 186void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
187 if (params.IsBuffer()) {
188 return;
189 }
186 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); 190 glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
187 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); 191 glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
188 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 192 glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
@@ -207,6 +211,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
207 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), 211 glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
208 nullptr, GL_DYNAMIC_STORAGE_BIT); 212 nullptr, GL_DYNAMIC_STORAGE_BIT);
209 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); 213 glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
214 break;
210 case SurfaceTarget::Texture2D: 215 case SurfaceTarget::Texture2D:
211 case SurfaceTarget::TextureCubemap: 216 case SurfaceTarget::TextureCubemap:
212 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, 217 glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
@@ -483,11 +488,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
483 const auto& dst_params{dst_view->GetSurfaceParams()}; 488 const auto& dst_params{dst_view->GetSurfaceParams()};
484 489
485 OpenGLState prev_state{OpenGLState::GetCurState()}; 490 OpenGLState prev_state{OpenGLState::GetCurState()};
486 SCOPE_EXIT({ prev_state.Apply(); }); 491 SCOPE_EXIT({
492 prev_state.AllDirty();
493 prev_state.Apply();
494 });
487 495
488 OpenGLState state; 496 OpenGLState state;
489 state.draw.read_framebuffer = src_framebuffer.handle; 497 state.draw.read_framebuffer = src_framebuffer.handle;
490 state.draw.draw_framebuffer = dst_framebuffer.handle; 498 state.draw.draw_framebuffer = dst_framebuffer.handle;
499 state.AllDirty();
491 state.Apply(); 500 state.Apply();
492 501
493 u32 buffers{}; 502 u32 buffers{};
@@ -535,6 +544,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
535} 544}
536 545
537void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 546void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
547 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
538 const auto& src_params = src_surface->GetSurfaceParams(); 548 const auto& src_params = src_surface->GetSurfaceParams();
539 const auto& dst_params = dst_surface->GetSurfaceParams(); 549 const auto& dst_params = dst_surface->GetSurfaceParams();
540 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 550 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index ff6ab6988..21324488a 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -51,7 +51,7 @@ public:
51 } 51 }
52 52
53protected: 53protected:
54 void DecorateSurfaceName(); 54 void DecorateSurfaceName() override;
55 55
56 View CreateView(const ViewParams& view_key) override; 56 View CreateView(const ViewParams& view_key) override;
57 View CreateViewInner(const ViewParams& view_key, bool is_proxy); 57 View CreateViewInner(const ViewParams& view_key, bool is_proxy);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b142521ec..af9684839 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,21 +101,19 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
101 101
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104/// Swap buffers (render frame) 104void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
105void RendererOpenGL::SwapBuffers(
106 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
107
108 system.GetPerfStats().EndSystemFrame(); 105 system.GetPerfStats().EndSystemFrame();
109 106
110 // Maintain the rasterizer's state as a priority 107 // Maintain the rasterizer's state as a priority
111 OpenGLState prev_state = OpenGLState::GetCurState(); 108 OpenGLState prev_state = OpenGLState::GetCurState();
109 state.AllDirty();
112 state.Apply(); 110 state.Apply();
113 111
114 if (framebuffer) { 112 if (framebuffer) {
115 // If framebuffer is provided, reload it from memory to a texture 113 // If framebuffer is provided, reload it from memory to a texture
116 if (screen_info.texture.width != (GLsizei)framebuffer->get().width || 114 if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
117 screen_info.texture.height != (GLsizei)framebuffer->get().height || 115 screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
118 screen_info.texture.pixel_format != framebuffer->get().pixel_format) { 116 screen_info.texture.pixel_format != framebuffer->pixel_format) {
119 // Reallocate texture if the framebuffer size has changed. 117 // Reallocate texture if the framebuffer size has changed.
120 // This is expected to not happen very often and hence should not be a 118 // This is expected to not happen very often and hence should not be a
121 // performance problem. 119 // performance problem.
@@ -130,6 +128,8 @@ void RendererOpenGL::SwapBuffers(
130 128
131 DrawScreen(render_window.GetFramebufferLayout()); 129 DrawScreen(render_window.GetFramebufferLayout());
132 130
131 rasterizer->TickFrame();
132
133 render_window.SwapBuffers(); 133 render_window.SwapBuffers();
134 } 134 }
135 135
@@ -139,6 +139,7 @@ void RendererOpenGL::SwapBuffers(
139 system.GetPerfStats().BeginSystemFrame(); 139 system.GetPerfStats().BeginSystemFrame();
140 140
141 // Restore the rasterizer state 141 // Restore the rasterizer state
142 prev_state.AllDirty();
142 prev_state.Apply(); 143 prev_state.Apply();
143} 144}
144 145
@@ -146,43 +147,43 @@ void RendererOpenGL::SwapBuffers(
146 * Loads framebuffer from emulated memory into the active OpenGL texture. 147 * Loads framebuffer from emulated memory into the active OpenGL texture.
147 */ 148 */
148void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { 149void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
149 const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
150 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
151 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
152
153 // Framebuffer orientation handling 150 // Framebuffer orientation handling
154 framebuffer_transform_flags = framebuffer.transform_flags; 151 framebuffer_transform_flags = framebuffer.transform_flags;
155 framebuffer_crop_rect = framebuffer.crop_rect; 152 framebuffer_crop_rect = framebuffer.crop_rect;
156 153
157 // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default 154 const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
158 // only allows rows to have a memory alignement of 4. 155 if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
159 ASSERT(framebuffer.stride % 4 == 0); 156 return;
160 157 }
161 if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
162 // Reset the screen info's display texture to its own permanent texture
163 screen_info.display_texture = screen_info.texture.resource.handle;
164
165 rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
166
167 constexpr u32 linear_bpp = 4;
168 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
169 framebuffer.width, framebuffer.height, bytes_per_pixel,
170 linear_bpp, Memory::GetPointer(framebuffer_addr),
171 gl_framebuffer_data.data());
172
173 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
174 158
175 // Update existing texture 159 // Reset the screen info's display texture to its own permanent texture
176 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that 160 screen_info.display_texture = screen_info.texture.resource.handle;
177 // they differ from the LCD resolution.
178 // TODO: Applications could theoretically crash yuzu here by specifying too large
179 // framebuffer sizes. We should make sure that this cannot happen.
180 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
181 framebuffer.height, screen_info.texture.gl_format,
182 screen_info.texture.gl_type, gl_framebuffer_data.data());
183 161
184 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 162 const auto pixel_format{
185 } 163 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
164 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
165 const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
166 const auto host_ptr{Memory::GetPointer(framebuffer_addr)};
167 rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
168
169 // TODO(Rodrigo): Read this from HLE
170 constexpr u32 block_height_log2 = 4;
171 VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
172 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
173 gl_framebuffer_data.data(), host_ptr);
174
175 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
176
177 // Update existing texture
178 // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
179 // they differ from the LCD resolution.
180 // TODO: Applications could theoretically crash yuzu here by specifying too large
181 // framebuffer sizes. We should make sure that this cannot happen.
182 glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
183 framebuffer.height, screen_info.texture.gl_format,
184 screen_info.texture.gl_type, gl_framebuffer_data.data());
185
186 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
186} 187}
187 188
188/** 189/**
@@ -205,6 +206,7 @@ void RendererOpenGL::InitOpenGLObjects() {
205 // Link shaders and get variable locations 206 // Link shaders and get variable locations
206 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); 207 shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
207 state.draw.shader_program = shader.handle; 208 state.draw.shader_program = shader.handle;
209 state.AllDirty();
208 state.Apply(); 210 state.Apply();
209 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); 211 uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
210 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture"); 212 uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -262,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() {
262 if (rasterizer) { 264 if (rasterizer) {
263 return; 265 return;
264 } 266 }
265 // Initialize sRGB Usage
266 OpenGLState::ClearsRGBUsed(); 267 OpenGLState::ClearsRGBUsed();
267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 268 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
268} 269}
@@ -273,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
273 texture.height = framebuffer.height; 274 texture.height = framebuffer.height;
274 texture.pixel_format = framebuffer.pixel_format; 275 texture.pixel_format = framebuffer.pixel_format;
275 276
277 const auto pixel_format{
278 VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
279 const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
280 gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
281
276 GLint internal_format; 282 GLint internal_format;
277 switch (framebuffer.pixel_format) { 283 switch (framebuffer.pixel_format) {
278 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 284 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
279 internal_format = GL_RGBA8; 285 internal_format = GL_RGBA8;
280 texture.gl_format = GL_RGBA; 286 texture.gl_format = GL_RGBA;
281 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 287 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
282 gl_framebuffer_data.resize(texture.width * texture.height * 4); 288 break;
289 case Tegra::FramebufferConfig::PixelFormat::RGB565:
290 internal_format = GL_RGB565;
291 texture.gl_format = GL_RGB;
292 texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
283 break; 293 break;
284 default: 294 default:
285 internal_format = GL_RGBA8; 295 internal_format = GL_RGBA8;
286 texture.gl_format = GL_RGBA; 296 texture.gl_format = GL_RGBA;
287 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 297 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
288 gl_framebuffer_data.resize(texture.width * texture.height * 4); 298 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
289 LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}", 299 static_cast<u32>(framebuffer.pixel_format));
290 static_cast<u32>(framebuffer.pixel_format));
291 UNREACHABLE();
292 } 300 }
293 301
294 texture.resource.Release(); 302 texture.resource.Release();
@@ -338,12 +346,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
338 // Workaround brigthness problems in SMO by enabling sRGB in the final output 346 // Workaround brigthness problems in SMO by enabling sRGB in the final output
339 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987 347 // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
340 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed(); 348 state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
349 state.AllDirty();
341 state.Apply(); 350 state.Apply();
342 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data()); 351 glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
343 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 352 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
344 // Restore default state 353 // Restore default state
345 state.framebuffer_srgb.enabled = false; 354 state.framebuffer_srgb.enabled = false;
346 state.texture_units[0].texture = 0; 355 state.texture_units[0].texture = 0;
356 state.AllDirty();
347 state.Apply(); 357 state.Apply();
348 // Clear sRGB state for the next frame 358 // Clear sRGB state for the next frame
349 OpenGLState::ClearsRGBUsed(); 359 OpenGLState::ClearsRGBUsed();
@@ -388,6 +398,7 @@ void RendererOpenGL::CaptureScreenshot() {
388 GLuint old_read_fb = state.draw.read_framebuffer; 398 GLuint old_read_fb = state.draw.read_framebuffer;
389 GLuint old_draw_fb = state.draw.draw_framebuffer; 399 GLuint old_draw_fb = state.draw.draw_framebuffer;
390 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; 400 state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
401 state.AllDirty();
391 state.Apply(); 402 state.Apply();
392 403
393 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 404 Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +418,7 @@ void RendererOpenGL::CaptureScreenshot() {
407 screenshot_framebuffer.Release(); 418 screenshot_framebuffer.Release();
408 state.draw.read_framebuffer = old_read_fb; 419 state.draw.read_framebuffer = old_read_fb;
409 state.draw.draw_framebuffer = old_draw_fb; 420 state.draw.draw_framebuffer = old_draw_fb;
421 state.AllDirty();
410 state.Apply(); 422 state.Apply();
411 glDeleteRenderbuffers(1, &renderbuffer); 423 glDeleteRenderbuffers(1, &renderbuffer);
412 424
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 4aebf2321..9bd086368 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -43,14 +43,13 @@ struct ScreenInfo {
43 TextureInfo texture; 43 TextureInfo texture;
44}; 44};
45 45
46class RendererOpenGL : public VideoCore::RendererBase { 46class RendererOpenGL final : public VideoCore::RendererBase {
47public: 47public:
48 explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); 48 explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
49 ~RendererOpenGL() override; 49 ~RendererOpenGL() override;
50 50
51 /// Swap buffers (render frame) 51 /// Swap buffers (render frame)
52 void SwapBuffers( 52 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
53 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
54 53
55 /// Initialize the renderer 54 /// Initialize the renderer
56 bool Init() override; 55 bool Init() override;
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 68c36988d..c504a2c1a 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -13,29 +13,67 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
17
18VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
19
20void VertexArrayPushBuffer::Setup(GLuint vao_) {
21 vao = vao_;
22 index_buffer = nullptr;
23 vertex_buffers.clear();
24}
25
26void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
27 index_buffer = buffer;
28}
29
30void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
31 GLintptr offset, GLsizei stride) {
32 vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
33}
34
35void VertexArrayPushBuffer::Bind() {
36 if (index_buffer) {
37 glVertexArrayElementBuffer(vao, *index_buffer);
38 }
39
40 // TODO(Rodrigo): Find a way to ARB_multi_bind this
41 for (const auto& entry : vertex_buffers) {
42 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
43 entry.stride);
44 }
45}
46
16BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 47BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
17 48
18BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 49BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
19 50
20void BindBuffersRangePushBuffer::Setup(GLuint first_) { 51void BindBuffersRangePushBuffer::Setup(GLuint first_) {
21 first = first_; 52 first = first_;
22 buffers.clear(); 53 buffer_pointers.clear();
23 offsets.clear(); 54 offsets.clear();
24 sizes.clear(); 55 sizes.clear();
25} 56}
26 57
27void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { 58void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
28 buffers.push_back(buffer); 59 buffer_pointers.push_back(buffer);
29 offsets.push_back(offset); 60 offsets.push_back(offset);
30 sizes.push_back(size); 61 sizes.push_back(size);
31} 62}
32 63
33void BindBuffersRangePushBuffer::Bind() const { 64void BindBuffersRangePushBuffer::Bind() {
34 const std::size_t count{buffers.size()}; 65 // Ensure sizes are valid.
66 const std::size_t count{buffer_pointers.size()};
35 DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); 67 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
36 if (count == 0) { 68 if (count == 0) {
37 return; 69 return;
38 } 70 }
71
72 // Dereference buffers.
73 buffers.resize(count);
74 std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
75 [](const GLuint* pointer) { return *pointer; });
76
39 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), 77 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
40 sizes.data()); 78 sizes.data());
41} 79}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 4a752f3b4..6c2b45546 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,20 +11,49 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class BindBuffersRangePushBuffer { 14class VertexArrayPushBuffer final {
15public: 15public:
16 BindBuffersRangePushBuffer(GLenum target); 16 explicit VertexArrayPushBuffer();
17 ~VertexArrayPushBuffer();
18
19 void Setup(GLuint vao_);
20
21 void SetIndexBuffer(const GLuint* buffer);
22
23 void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
24 GLsizei stride);
25
26 void Bind();
27
28private:
29 struct Entry {
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35
36 GLuint vao{};
37 const GLuint* index_buffer{};
38 std::vector<Entry> vertex_buffers;
39};
40
41class BindBuffersRangePushBuffer final {
42public:
43 explicit BindBuffersRangePushBuffer(GLenum target);
17 ~BindBuffersRangePushBuffer(); 44 ~BindBuffersRangePushBuffer();
18 45
19 void Setup(GLuint first_); 46 void Setup(GLuint first_);
20 47
21 void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); 48 void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
22 49
23 void Bind() const; 50 void Bind();
24 51
25private: 52private:
26 GLenum target; 53 GLenum target{};
27 GLuint first; 54 GLuint first{};
55 std::vector<const GLuint*> buffer_pointers;
56
28 std::vector<GLuint> buffers; 57 std::vector<GLuint> buffers;
29 std::vector<GLintptr> offsets; 58 std::vector<GLintptr> offsets;
30 std::vector<GLsizeiptr> sizes; 59 std::vector<GLsizeiptr> sizes;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 02a9f5ecb..d2e9f4031 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -109,8 +109,8 @@ void VKBufferCache::Reserve(std::size_t max_size) {
109 } 109 }
110} 110}
111 111
112VKExecutionContext VKBufferCache::Send(VKExecutionContext exctx) { 112void VKBufferCache::Send() {
113 return stream_buffer->Send(exctx, buffer_offset - buffer_offset_base); 113 stream_buffer->Send(buffer_offset - buffer_offset_base);
114} 114}
115 115
116void VKBufferCache::AlignBuffer(std::size_t alignment) { 116void VKBufferCache::AlignBuffer(std::size_t alignment) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3edf460df..49f13bcdc 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -77,7 +77,7 @@ public:
77 void Reserve(std::size_t max_size); 77 void Reserve(std::size_t max_size);
78 78
79 /// Ensures that the set data is sent to the device. 79 /// Ensures that the set data is sent to the device.
80 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx); 80 void Send();
81 81
82 /// Returns the buffer cache handle. 82 /// Returns the buffer cache handle.
83 vk::Buffer GetBuffer() const { 83 vk::Buffer GetBuffer() const {
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index 771b05c73..1f73b716b 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -4,9 +4,6 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <unordered_map>
8
9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h" 7#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/sampler_cache.h" 8#include "video_core/sampler_cache.h"
12#include "video_core/textures/texture.h" 9#include "video_core/textures/texture.h"
@@ -21,9 +18,9 @@ public:
21 ~VKSamplerCache(); 18 ~VKSamplerCache();
22 19
23protected: 20protected:
24 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const; 21 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
25 22
26 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const; 23 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override;
27 24
28private: 25private:
29 const VKDevice& device; 26 const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f1fea1871..0f8116458 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -19,23 +19,19 @@ VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_man
19 19
20VKScheduler::~VKScheduler() = default; 20VKScheduler::~VKScheduler() = default;
21 21
22VKExecutionContext VKScheduler::GetExecutionContext() const { 22void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
23 return VKExecutionContext(current_fence, current_cmdbuf);
24}
25
26VKExecutionContext VKScheduler::Flush(vk::Semaphore semaphore) {
27 SubmitExecution(semaphore); 23 SubmitExecution(semaphore);
28 current_fence->Release(); 24 if (release_fence)
25 current_fence->Release();
29 AllocateNewContext(); 26 AllocateNewContext();
30 return GetExecutionContext();
31} 27}
32 28
33VKExecutionContext VKScheduler::Finish(vk::Semaphore semaphore) { 29void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
34 SubmitExecution(semaphore); 30 SubmitExecution(semaphore);
35 current_fence->Wait(); 31 current_fence->Wait();
36 current_fence->Release(); 32 if (release_fence)
33 current_fence->Release();
37 AllocateNewContext(); 34 AllocateNewContext();
38 return GetExecutionContext();
39} 35}
40 36
41void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { 37void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index cfaf5376f..0e5b49c7f 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -10,10 +10,43 @@
10namespace Vulkan { 10namespace Vulkan {
11 11
12class VKDevice; 12class VKDevice;
13class VKExecutionContext;
14class VKFence; 13class VKFence;
15class VKResourceManager; 14class VKResourceManager;
16 15
16class VKFenceView {
17public:
18 VKFenceView() = default;
19 VKFenceView(VKFence* const& fence) : fence{fence} {}
20
21 VKFence* operator->() const noexcept {
22 return fence;
23 }
24
25 operator VKFence&() const noexcept {
26 return *fence;
27 }
28
29private:
30 VKFence* const& fence;
31};
32
33class VKCommandBufferView {
34public:
35 VKCommandBufferView() = default;
36 VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
37
38 const vk::CommandBuffer* operator->() const noexcept {
39 return &cmdbuf;
40 }
41
42 operator vk::CommandBuffer() const noexcept {
43 return cmdbuf;
44 }
45
46private:
47 const vk::CommandBuffer& cmdbuf;
48};
49
17/// The scheduler abstracts command buffer and fence management with an interface that's able to do 50/// The scheduler abstracts command buffer and fence management with an interface that's able to do
18/// OpenGL-like operations on Vulkan command buffers. 51/// OpenGL-like operations on Vulkan command buffers.
19class VKScheduler { 52class VKScheduler {
@@ -21,16 +54,21 @@ public:
21 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); 54 explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
22 ~VKScheduler(); 55 ~VKScheduler();
23 56
24 /// Gets the current execution context. 57 /// Gets a reference to the current fence.
25 [[nodiscard]] VKExecutionContext GetExecutionContext() const; 58 VKFenceView GetFence() const {
59 return current_fence;
60 }
61
62 /// Gets a reference to the current command buffer.
63 VKCommandBufferView GetCommandBuffer() const {
64 return current_cmdbuf;
65 }
26 66
27 /// Sends the current execution context to the GPU. It invalidates the current execution context 67 /// Sends the current execution context to the GPU.
28 /// and returns a new one. 68 void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
29 VKExecutionContext Flush(vk::Semaphore semaphore = nullptr);
30 69
31 /// Sends the current execution context to the GPU and waits for it to complete. It invalidates 70 /// Sends the current execution context to the GPU and waits for it to complete.
32 /// the current execution context and returns a new one. 71 void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
33 VKExecutionContext Finish(vk::Semaphore semaphore = nullptr);
34 72
35private: 73private:
36 void SubmitExecution(vk::Semaphore semaphore); 74 void SubmitExecution(vk::Semaphore semaphore);
@@ -44,26 +82,4 @@ private:
44 VKFence* next_fence = nullptr; 82 VKFence* next_fence = nullptr;
45}; 83};
46 84
47class VKExecutionContext {
48 friend class VKScheduler;
49
50public:
51 VKExecutionContext() = default;
52
53 VKFence& GetFence() const {
54 return *fence;
55 }
56
57 vk::CommandBuffer GetCommandBuffer() const {
58 return cmdbuf;
59 }
60
61private:
62 explicit VKExecutionContext(VKFence* fence, vk::CommandBuffer cmdbuf)
63 : fence{fence}, cmdbuf{cmdbuf} {}
64
65 VKFence* fence{};
66 vk::CommandBuffer cmdbuf;
67};
68
69} // namespace Vulkan 85} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 97ce214b1..a35b45c9c 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -205,10 +205,6 @@ public:
205 } 205 }
206 206
207private: 207private:
208 using OperationDecompilerFn = Id (SPIRVDecompiler::*)(Operation);
209 using OperationDecompilersArray =
210 std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
211
212 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount); 208 static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
213 209
214 void AllocateBindings() { 210 void AllocateBindings() {
@@ -430,20 +426,17 @@ private:
430 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, 426 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
431 t_in_uint, "instance_index"); 427 t_in_uint, "instance_index");
432 428
433 bool is_point_size_declared = false;
434 bool is_clip_distances_declared = false; 429 bool is_clip_distances_declared = false;
435 for (const auto index : ir.GetOutputAttributes()) { 430 for (const auto index : ir.GetOutputAttributes()) {
436 if (index == Attribute::Index::PointSize) { 431 if (index == Attribute::Index::ClipDistances0123 ||
437 is_point_size_declared = true; 432 index == Attribute::Index::ClipDistances4567) {
438 } else if (index == Attribute::Index::ClipDistances0123 ||
439 index == Attribute::Index::ClipDistances4567) {
440 is_clip_distances_declared = true; 433 is_clip_distances_declared = true;
441 } 434 }
442 } 435 }
443 436
444 std::vector<Id> members; 437 std::vector<Id> members;
445 members.push_back(t_float4); 438 members.push_back(t_float4);
446 if (is_point_size_declared) { 439 if (ir.UsesPointSize()) {
447 members.push_back(t_float); 440 members.push_back(t_float);
448 } 441 }
449 if (is_clip_distances_declared) { 442 if (is_clip_distances_declared) {
@@ -466,7 +459,7 @@ private:
466 459
467 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); 460 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
468 point_size_index = 461 point_size_index =
469 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); 462 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
470 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", 463 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
471 is_clip_distances_declared); 464 is_clip_distances_declared);
472 465
@@ -712,7 +705,8 @@ private:
712 case Attribute::Index::Position: 705 case Attribute::Index::Position:
713 return AccessElement(t_out_float, per_vertex, position_index, 706 return AccessElement(t_out_float, per_vertex, position_index,
714 abuf->GetElement()); 707 abuf->GetElement());
715 case Attribute::Index::PointSize: 708 case Attribute::Index::LayerViewportPointSize:
709 UNIMPLEMENTED_IF(abuf->GetElement() != 3);
716 return AccessElement(t_out_float, per_vertex, point_size_index); 710 return AccessElement(t_out_float, per_vertex, point_size_index);
717 case Attribute::Index::ClipDistances0123: 711 case Attribute::Index::ClipDistances0123:
718 return AccessElement(t_out_float, per_vertex, clip_distances_index, 712 return AccessElement(t_out_float, per_vertex, clip_distances_index,
@@ -741,6 +735,16 @@ private:
741 return {}; 735 return {};
742 } 736 }
743 737
738 Id FCastHalf0(Operation operation) {
739 UNIMPLEMENTED();
740 return {};
741 }
742
743 Id FCastHalf1(Operation operation) {
744 UNIMPLEMENTED();
745 return {};
746 }
747
744 Id HNegate(Operation operation) { 748 Id HNegate(Operation operation) {
745 UNIMPLEMENTED(); 749 UNIMPLEMENTED();
746 return {}; 750 return {};
@@ -751,6 +755,11 @@ private:
751 return {}; 755 return {};
752 } 756 }
753 757
758 Id HCastFloat(Operation operation) {
759 UNIMPLEMENTED();
760 return {};
761 }
762
754 Id HUnpack(Operation operation) { 763 Id HUnpack(Operation operation) {
755 UNIMPLEMENTED(); 764 UNIMPLEMENTED();
756 return {}; 765 return {};
@@ -806,12 +815,7 @@ private:
806 return {}; 815 return {};
807 } 816 }
808 817
809 Id LogicalAll2(Operation operation) { 818 Id LogicalAnd2(Operation operation) {
810 UNIMPLEMENTED();
811 return {};
812 }
813
814 Id LogicalAny2(Operation operation) {
815 UNIMPLEMENTED(); 819 UNIMPLEMENTED();
816 return {}; 820 return {};
817 } 821 }
@@ -949,6 +953,14 @@ private:
949 return {}; 953 return {};
950 } 954 }
951 955
956 Id BranchIndirect(Operation operation) {
957 const Id op_a = VisitOperand<Type::Uint>(operation, 0);
958
959 Emit(OpStore(jmp_to, op_a));
960 BranchingOp([&]() { Emit(OpBranch(continue_label)); });
961 return {};
962 }
963
952 Id PushFlowStack(Operation operation) { 964 Id PushFlowStack(Operation operation) {
953 const auto target = std::get_if<ImmediateNode>(&*operation[0]); 965 const auto target = std::get_if<ImmediateNode>(&*operation[0]);
954 ASSERT(target); 966 ASSERT(target);
@@ -1060,6 +1072,26 @@ private:
1060 return {}; 1072 return {};
1061 } 1073 }
1062 1074
1075 Id BallotThread(Operation) {
1076 UNIMPLEMENTED();
1077 return {};
1078 }
1079
1080 Id VoteAll(Operation) {
1081 UNIMPLEMENTED();
1082 return {};
1083 }
1084
1085 Id VoteAny(Operation) {
1086 UNIMPLEMENTED();
1087 return {};
1088 }
1089
1090 Id VoteEqual(Operation) {
1091 UNIMPLEMENTED();
1092 return {};
1093 }
1094
1063 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type, 1095 Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
1064 const std::string& name) { 1096 const std::string& name) {
1065 const Id id = OpVariable(type, storage); 1097 const Id id = OpVariable(type, storage);
@@ -1200,7 +1232,7 @@ private:
1200 return {}; 1232 return {};
1201 } 1233 }
1202 1234
1203 static constexpr OperationDecompilersArray operation_decompilers = { 1235 static constexpr std::array operation_decompilers = {
1204 &SPIRVDecompiler::Assign, 1236 &SPIRVDecompiler::Assign,
1205 1237
1206 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float, 1238 &SPIRVDecompiler::Ternary<&Module::OpSelect, Type::Float, Type::Bool, Type::Float,
@@ -1213,6 +1245,8 @@ private:
1213 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, 1245 &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>,
1214 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, 1246 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>,
1215 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, 1247 &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>,
1248 &SPIRVDecompiler::FCastHalf0,
1249 &SPIRVDecompiler::FCastHalf1,
1216 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, 1250 &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>,
1217 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, 1251 &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>,
1218 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, 1252 &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>,
@@ -1273,6 +1307,7 @@ private:
1273 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, 1307 &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>,
1274 &SPIRVDecompiler::HNegate, 1308 &SPIRVDecompiler::HNegate,
1275 &SPIRVDecompiler::HClamp, 1309 &SPIRVDecompiler::HClamp,
1310 &SPIRVDecompiler::HCastFloat,
1276 &SPIRVDecompiler::HUnpack, 1311 &SPIRVDecompiler::HUnpack,
1277 &SPIRVDecompiler::HMergeF32, 1312 &SPIRVDecompiler::HMergeF32,
1278 &SPIRVDecompiler::HMergeH0, 1313 &SPIRVDecompiler::HMergeH0,
@@ -1285,8 +1320,7 @@ private:
1285 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>, 1320 &SPIRVDecompiler::Binary<&Module::OpLogicalNotEqual, Type::Bool>,
1286 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>, 1321 &SPIRVDecompiler::Unary<&Module::OpLogicalNot, Type::Bool>,
1287 &SPIRVDecompiler::LogicalPick2, 1322 &SPIRVDecompiler::LogicalPick2,
1288 &SPIRVDecompiler::LogicalAll2, 1323 &SPIRVDecompiler::LogicalAnd2,
1289 &SPIRVDecompiler::LogicalAny2,
1290 1324
1291 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>, 1325 &SPIRVDecompiler::Binary<&Module::OpFOrdLessThan, Type::Bool, Type::Float>,
1292 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>, 1326 &SPIRVDecompiler::Binary<&Module::OpFOrdEqual, Type::Bool, Type::Float>,
@@ -1334,6 +1368,7 @@ private:
1334 &SPIRVDecompiler::ImageStore, 1368 &SPIRVDecompiler::ImageStore,
1335 1369
1336 &SPIRVDecompiler::Branch, 1370 &SPIRVDecompiler::Branch,
1371 &SPIRVDecompiler::BranchIndirect,
1337 &SPIRVDecompiler::PushFlowStack, 1372 &SPIRVDecompiler::PushFlowStack,
1338 &SPIRVDecompiler::PopFlowStack, 1373 &SPIRVDecompiler::PopFlowStack,
1339 &SPIRVDecompiler::Exit, 1374 &SPIRVDecompiler::Exit,
@@ -1349,7 +1384,13 @@ private:
1349 &SPIRVDecompiler::WorkGroupId<0>, 1384 &SPIRVDecompiler::WorkGroupId<0>,
1350 &SPIRVDecompiler::WorkGroupId<1>, 1385 &SPIRVDecompiler::WorkGroupId<1>,
1351 &SPIRVDecompiler::WorkGroupId<2>, 1386 &SPIRVDecompiler::WorkGroupId<2>,
1387
1388 &SPIRVDecompiler::BallotThread,
1389 &SPIRVDecompiler::VoteAll,
1390 &SPIRVDecompiler::VoteAny,
1391 &SPIRVDecompiler::VoteEqual,
1352 }; 1392 };
1393 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
1353 1394
1354 const VKDevice& device; 1395 const VKDevice& device;
1355 const ShaderIR& ir; 1396 const ShaderIR& ir;
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index 58ffa42f2..62f1427f5 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -46,12 +46,12 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Reserve(u64 size) {
46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; 46 return {mapped_pointer + offset, offset, invalidation_mark.has_value()};
47} 47}
48 48
49VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) { 49void VKStreamBuffer::Send(u64 size) {
50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); 50 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
51 51
52 if (invalidation_mark) { 52 if (invalidation_mark) {
53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. 53 // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish.
54 exctx = scheduler.Flush(); 54 scheduler.Flush();
55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark, 55 std::for_each(watches.begin(), watches.begin() + *invalidation_mark,
56 [&](auto& resource) { resource->Wait(); }); 56 [&](auto& resource) { resource->Wait(); });
57 invalidation_mark = std::nullopt; 57 invalidation_mark = std::nullopt;
@@ -62,11 +62,9 @@ VKExecutionContext VKStreamBuffer::Send(VKExecutionContext exctx, u64 size) {
62 ReserveWatches(WATCHES_RESERVE_CHUNK); 62 ReserveWatches(WATCHES_RESERVE_CHUNK);
63 } 63 }
64 // Add a watch for this allocation. 64 // Add a watch for this allocation.
65 watches[used_watches++]->Watch(exctx.GetFence()); 65 watches[used_watches++]->Watch(scheduler.GetFence());
66 66
67 offset += size; 67 offset += size;
68
69 return exctx;
70} 68}
71 69
72void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { 70void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 69d036ccd..842e54162 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -37,7 +37,7 @@ public:
37 std::tuple<u8*, u64, bool> Reserve(u64 size); 37 std::tuple<u8*, u64, bool> Reserve(u64 size);
38 38
39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 39 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
40 [[nodiscard]] VKExecutionContext Send(VKExecutionContext exctx, u64 size); 40 void Send(u64 size);
41 41
42 vk::Buffer GetBuffer() const { 42 vk::Buffer GetBuffer() const {
43 return *buffer; 43 return *buffer;
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
new file mode 100644
index 000000000..ec3a76690
--- /dev/null
+++ b/src/video_core/shader/control_flow.cpp
@@ -0,0 +1,481 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <list>
6#include <map>
7#include <stack>
8#include <unordered_map>
9#include <unordered_set>
10#include <vector>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/shader/control_flow.h"
15#include "video_core/shader/shader_ir.h"
16
17namespace VideoCommon::Shader {
18namespace {
19using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode;
21
22constexpr s32 unassigned_branch = -2;
23
24struct Query {
25 u32 address{};
26 std::stack<u32> ssy_stack{};
27 std::stack<u32> pbk_stack{};
28};
29
30struct BlockStack {
31 BlockStack() = default;
32 explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
33 std::stack<u32> ssy_stack{};
34 std::stack<u32> pbk_stack{};
35};
36
37struct BlockBranchInfo {
38 Condition condition{};
39 s32 address{exit_branch};
40 bool kill{};
41 bool is_sync{};
42 bool is_brk{};
43 bool ignore{};
44};
45
46struct BlockInfo {
47 u32 start{};
48 u32 end{};
49 bool visited{};
50 BlockBranchInfo branch{};
51
52 bool IsInside(const u32 address) const {
53 return start <= address && address <= end;
54 }
55};
56
57struct CFGRebuildState {
58 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
59 const u32 start)
60 : start{start}, program_code{program_code}, program_size{program_size} {}
61
62 u32 start{};
63 std::vector<BlockInfo> block_info{};
64 std::list<u32> inspect_queries{};
65 std::list<Query> queries{};
66 std::unordered_map<u32, u32> registered{};
67 std::unordered_set<u32> labels{};
68 std::map<u32, u32> ssy_labels{};
69 std::map<u32, u32> pbk_labels{};
70 std::unordered_map<u32, BlockStack> stacks{};
71 const ProgramCode& program_code;
72 const std::size_t program_size;
73};
74
75enum class BlockCollision : u32 { None, Found, Inside };
76
77std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
78 const auto& blocks = state.block_info;
79 for (u32 index = 0; index < blocks.size(); index++) {
80 if (blocks[index].start == address) {
81 return {BlockCollision::Found, index};
82 }
83 if (blocks[index].IsInside(address)) {
84 return {BlockCollision::Inside, index};
85 }
86 }
87 return {BlockCollision::None, 0xFFFFFFFF};
88}
89
90struct ParseInfo {
91 BlockBranchInfo branch_info{};
92 u32 end_address{};
93};
94
95BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
96 auto& it = state.block_info.emplace_back();
97 it.start = start;
98 it.end = end;
99 const u32 index = static_cast<u32>(state.block_info.size() - 1);
100 state.registered.insert({start, index});
101 return it;
102}
103
104Pred GetPredicate(u32 index, bool negated) {
105 return static_cast<Pred>(index + (negated ? 8 : 0));
106}
107
108/**
109 * Returns whether the instruction at the specified offset is a 'sched' instruction.
110 * Sched instructions always appear before a sequence of 3 instructions.
111 */
112constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
113 constexpr u32 SchedPeriod = 4;
114 u32 absolute_offset = offset - main_offset;
115
116 return (absolute_offset % SchedPeriod) == 0;
117}
118
119enum class ParseResult : u32 {
120 ControlCaught,
121 BlockEnd,
122 AbnormalFlow,
123};
124
125std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
126 u32 offset = static_cast<u32>(address);
127 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
128 ParseInfo parse_info{};
129
130 const auto insert_label = [](CFGRebuildState& state, u32 address) {
131 const auto pair = state.labels.emplace(address);
132 if (pair.second) {
133 state.inspect_queries.push_back(address);
134 }
135 };
136
137 while (true) {
138 if (offset >= end_address) {
139 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
140 ASSERT_MSG(false, "Shader passed the current limit!");
141 parse_info.branch_info.address = exit_branch;
142 parse_info.branch_info.ignore = false;
143 break;
144 }
145 if (state.registered.count(offset) != 0) {
146 parse_info.branch_info.address = offset;
147 parse_info.branch_info.ignore = true;
148 break;
149 }
150 if (IsSchedInstruction(offset, state.start)) {
151 offset++;
152 continue;
153 }
154 const Instruction instr = {state.program_code[offset]};
155 const auto opcode = OpCode::Decode(instr);
156 if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
157 offset++;
158 continue;
159 }
160
161 switch (opcode->get().GetId()) {
162 case OpCode::Id::EXIT: {
163 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
164 parse_info.branch_info.condition.predicate =
165 GetPredicate(pred_index, instr.negate_pred != 0);
166 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
167 offset++;
168 continue;
169 }
170 const ConditionCode cc = instr.flow_condition_code;
171 parse_info.branch_info.condition.cc = cc;
172 if (cc == ConditionCode::F) {
173 offset++;
174 continue;
175 }
176 parse_info.branch_info.address = exit_branch;
177 parse_info.branch_info.kill = false;
178 parse_info.branch_info.is_sync = false;
179 parse_info.branch_info.is_brk = false;
180 parse_info.branch_info.ignore = false;
181 parse_info.end_address = offset;
182
183 return {ParseResult::ControlCaught, parse_info};
184 }
185 case OpCode::Id::BRA: {
186 if (instr.bra.constant_buffer != 0) {
187 return {ParseResult::AbnormalFlow, parse_info};
188 }
189 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
190 parse_info.branch_info.condition.predicate =
191 GetPredicate(pred_index, instr.negate_pred != 0);
192 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
193 offset++;
194 continue;
195 }
196 const ConditionCode cc = instr.flow_condition_code;
197 parse_info.branch_info.condition.cc = cc;
198 if (cc == ConditionCode::F) {
199 offset++;
200 continue;
201 }
202 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
203 if (branch_offset == 0) {
204 parse_info.branch_info.address = exit_branch;
205 } else {
206 parse_info.branch_info.address = branch_offset;
207 }
208 insert_label(state, branch_offset);
209 parse_info.branch_info.kill = false;
210 parse_info.branch_info.is_sync = false;
211 parse_info.branch_info.is_brk = false;
212 parse_info.branch_info.ignore = false;
213 parse_info.end_address = offset;
214
215 return {ParseResult::ControlCaught, parse_info};
216 }
217 case OpCode::Id::SYNC: {
218 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
219 parse_info.branch_info.condition.predicate =
220 GetPredicate(pred_index, instr.negate_pred != 0);
221 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
222 offset++;
223 continue;
224 }
225 const ConditionCode cc = instr.flow_condition_code;
226 parse_info.branch_info.condition.cc = cc;
227 if (cc == ConditionCode::F) {
228 offset++;
229 continue;
230 }
231 parse_info.branch_info.address = unassigned_branch;
232 parse_info.branch_info.kill = false;
233 parse_info.branch_info.is_sync = true;
234 parse_info.branch_info.is_brk = false;
235 parse_info.branch_info.ignore = false;
236 parse_info.end_address = offset;
237
238 return {ParseResult::ControlCaught, parse_info};
239 }
240 case OpCode::Id::BRK: {
241 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
242 parse_info.branch_info.condition.predicate =
243 GetPredicate(pred_index, instr.negate_pred != 0);
244 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
245 offset++;
246 continue;
247 }
248 const ConditionCode cc = instr.flow_condition_code;
249 parse_info.branch_info.condition.cc = cc;
250 if (cc == ConditionCode::F) {
251 offset++;
252 continue;
253 }
254 parse_info.branch_info.address = unassigned_branch;
255 parse_info.branch_info.kill = false;
256 parse_info.branch_info.is_sync = false;
257 parse_info.branch_info.is_brk = true;
258 parse_info.branch_info.ignore = false;
259 parse_info.end_address = offset;
260
261 return {ParseResult::ControlCaught, parse_info};
262 }
263 case OpCode::Id::KIL: {
264 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
265 parse_info.branch_info.condition.predicate =
266 GetPredicate(pred_index, instr.negate_pred != 0);
267 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
268 offset++;
269 continue;
270 }
271 const ConditionCode cc = instr.flow_condition_code;
272 parse_info.branch_info.condition.cc = cc;
273 if (cc == ConditionCode::F) {
274 offset++;
275 continue;
276 }
277 parse_info.branch_info.address = exit_branch;
278 parse_info.branch_info.kill = true;
279 parse_info.branch_info.is_sync = false;
280 parse_info.branch_info.is_brk = false;
281 parse_info.branch_info.ignore = false;
282 parse_info.end_address = offset;
283
284 return {ParseResult::ControlCaught, parse_info};
285 }
286 case OpCode::Id::SSY: {
287 const u32 target = offset + instr.bra.GetBranchTarget();
288 insert_label(state, target);
289 state.ssy_labels.emplace(offset, target);
290 break;
291 }
292 case OpCode::Id::PBK: {
293 const u32 target = offset + instr.bra.GetBranchTarget();
294 insert_label(state, target);
295 state.pbk_labels.emplace(offset, target);
296 break;
297 }
298 case OpCode::Id::BRX: {
299 return {ParseResult::AbnormalFlow, parse_info};
300 }
301 default:
302 break;
303 }
304
305 offset++;
306 }
307 parse_info.branch_info.kill = false;
308 parse_info.branch_info.is_sync = false;
309 parse_info.branch_info.is_brk = false;
310 parse_info.end_address = offset - 1;
311 return {ParseResult::BlockEnd, parse_info};
312}
313
314bool TryInspectAddress(CFGRebuildState& state) {
315 if (state.inspect_queries.empty()) {
316 return false;
317 }
318
319 const u32 address = state.inspect_queries.front();
320 state.inspect_queries.pop_front();
321 const auto [result, block_index] = TryGetBlock(state, address);
322 switch (result) {
323 case BlockCollision::Found: {
324 return true;
325 }
326 case BlockCollision::Inside: {
327 // This case is the tricky one:
328 // We need to Split the block in 2 sepparate blocks
329 const u32 end = state.block_info[block_index].end;
330 BlockInfo& new_block = CreateBlockInfo(state, address, end);
331 BlockInfo& current_block = state.block_info[block_index];
332 current_block.end = address - 1;
333 new_block.branch = current_block.branch;
334 BlockBranchInfo forward_branch{};
335 forward_branch.address = address;
336 forward_branch.ignore = true;
337 current_block.branch = forward_branch;
338 return true;
339 }
340 default:
341 break;
342 }
343 const auto [parse_result, parse_info] = ParseCode(state, address);
344 if (parse_result == ParseResult::AbnormalFlow) {
345 // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
346 return false;
347 }
348
349 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
350 block_info.branch = parse_info.branch_info;
351 if (parse_info.branch_info.condition.IsUnconditional()) {
352 return true;
353 }
354
355 const u32 fallthrough_address = parse_info.end_address + 1;
356 state.inspect_queries.push_front(fallthrough_address);
357 return true;
358}
359
360bool TryQuery(CFGRebuildState& state) {
361 const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
362 BlockInfo& block) {
363 auto gather_start = labels.lower_bound(block.start);
364 const auto gather_end = labels.upper_bound(block.end);
365 while (gather_start != gather_end) {
366 cc.push(gather_start->second);
367 ++gather_start;
368 }
369 };
370 if (state.queries.empty()) {
371 return false;
372 }
373
374 Query& q = state.queries.front();
375 const u32 block_index = state.registered[q.address];
376 BlockInfo& block = state.block_info[block_index];
377 // If the block is visited, check if the stacks match, else gather the ssy/pbk
378 // labels into the current stack and look if the branch at the end of the block
379 // consumes a label. Schedule new queries accordingly
380 if (block.visited) {
381 BlockStack& stack = state.stacks[q.address];
382 const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
383 (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
384 state.queries.pop_front();
385 return all_okay;
386 }
387 block.visited = true;
388 state.stacks.insert_or_assign(q.address, BlockStack{q});
389
390 Query q2(q);
391 state.queries.pop_front();
392 gather_labels(q2.ssy_stack, state.ssy_labels, block);
393 gather_labels(q2.pbk_stack, state.pbk_labels, block);
394 if (!block.branch.condition.IsUnconditional()) {
395 q2.address = block.end + 1;
396 state.queries.push_back(q2);
397 }
398
399 Query conditional_query{q2};
400 if (block.branch.is_sync) {
401 if (block.branch.address == unassigned_branch) {
402 block.branch.address = conditional_query.ssy_stack.top();
403 }
404 conditional_query.ssy_stack.pop();
405 }
406 if (block.branch.is_brk) {
407 if (block.branch.address == unassigned_branch) {
408 block.branch.address = conditional_query.pbk_stack.top();
409 }
410 conditional_query.pbk_stack.pop();
411 }
412 conditional_query.address = block.branch.address;
413 state.queries.push_back(std::move(conditional_query));
414 return true;
415}
416} // Anonymous namespace
417
418std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
419 std::size_t program_size, u32 start_address) {
420 CFGRebuildState state{program_code, program_size, start_address};
421
422 // Inspect Code and generate blocks
423 state.labels.clear();
424 state.labels.emplace(start_address);
425 state.inspect_queries.push_back(state.start);
426 while (!state.inspect_queries.empty()) {
427 if (!TryInspectAddress(state)) {
428 return {};
429 }
430 }
431
432 // Decompile Stacks
433 state.queries.push_back(Query{state.start, {}, {}});
434 bool decompiled = true;
435 while (!state.queries.empty()) {
436 if (!TryQuery(state)) {
437 decompiled = false;
438 break;
439 }
440 }
441
442 // Sort and organize results
443 std::sort(state.block_info.begin(), state.block_info.end(),
444 [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
445 ShaderCharacteristics result_out{};
446 result_out.decompilable = decompiled;
447 result_out.start = start_address;
448 result_out.end = start_address;
449 for (const auto& block : state.block_info) {
450 ShaderBlock new_block{};
451 new_block.start = block.start;
452 new_block.end = block.end;
453 new_block.ignore_branch = block.branch.ignore;
454 if (!new_block.ignore_branch) {
455 new_block.branch.cond = block.branch.condition;
456 new_block.branch.kills = block.branch.kill;
457 new_block.branch.address = block.branch.address;
458 }
459 result_out.end = std::max(result_out.end, block.end);
460 result_out.blocks.push_back(new_block);
461 }
462 if (result_out.decompilable) {
463 result_out.labels = std::move(state.labels);
464 return {std::move(result_out)};
465 }
466
467 // If it's not decompilable, merge the unlabelled blocks together
468 auto back = result_out.blocks.begin();
469 auto next = std::next(back);
470 while (next != result_out.blocks.end()) {
471 if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
472 back->end = next->end;
473 next = result_out.blocks.erase(next);
474 continue;
475 }
476 back = next;
477 ++next;
478 }
479 return {std::move(result_out)};
480}
481} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
new file mode 100644
index 000000000..b0a5e4f8c
--- /dev/null
+++ b/src/video_core/shader/control_flow.h
@@ -0,0 +1,79 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <list>
8#include <optional>
9#include <unordered_set>
10
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::ConditionCode;
17using Tegra::Shader::Pred;
18
19constexpr s32 exit_branch = -1;
20
21struct Condition {
22 Pred predicate{Pred::UnusedIndex};
23 ConditionCode cc{ConditionCode::T};
24
25 bool IsUnconditional() const {
26 return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
27 }
28
29 bool operator==(const Condition& other) const {
30 return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
31 }
32
33 bool operator!=(const Condition& other) const {
34 return !operator==(other);
35 }
36};
37
38struct ShaderBlock {
39 struct Branch {
40 Condition cond{};
41 bool kills{};
42 s32 address{};
43
44 bool operator==(const Branch& b) const {
45 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
46 }
47
48 bool operator!=(const Branch& b) const {
49 return !operator==(b);
50 }
51 };
52
53 u32 start{};
54 u32 end{};
55 bool ignore_branch{};
56 Branch branch{};
57
58 bool operator==(const ShaderBlock& sb) const {
59 return std::tie(start, end, ignore_branch, branch) ==
60 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
61 }
62
63 bool operator!=(const ShaderBlock& sb) const {
64 return !operator==(sb);
65 }
66};
67
68struct ShaderCharacteristics {
69 std::list<ShaderBlock> blocks{};
70 bool decompilable{};
71 u32 start{};
72 u32 end{};
73 std::unordered_set<u32> labels{};
74};
75
76std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
77 std::size_t program_size, u32 start_address);
78
79} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2c9ff28f2..47a9fd961 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/engines/shader_header.h" 13#include "video_core/engines/shader_header.h"
14#include "video_core/shader/control_flow.h"
14#include "video_core/shader/node_helper.h" 15#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
16 17
@@ -21,20 +22,6 @@ using Tegra::Shader::OpCode;
21 22
22namespace { 23namespace {
23 24
24/// Merges exit method of two parallel branches.
25constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
26 if (a == ExitMethod::Undetermined) {
27 return b;
28 }
29 if (b == ExitMethod::Undetermined) {
30 return a;
31 }
32 if (a == b) {
33 return a;
34 }
35 return ExitMethod::Conditional;
36}
37
38/** 25/**
39 * Returns whether the instruction at the specified offset is a 'sched' instruction. 26 * Returns whether the instruction at the specified offset is a 'sched' instruction.
40 * Sched instructions always appear before a sequence of 3 instructions. 27 * Sched instructions always appear before a sequence of 3 instructions.
@@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
51void ShaderIR::Decode() { 38void ShaderIR::Decode() {
52 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 39 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
53 40
54 std::set<u32> labels; 41 disable_flow_stack = false;
55 const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); 42 const auto info = ScanFlow(program_code, program_size, main_offset);
56 if (exit_method != ExitMethod::AlwaysEnd) { 43 if (info) {
57 UNREACHABLE_MSG("Program does not always end"); 44 const auto& shader_info = *info;
58 } 45 coverage_begin = shader_info.start;
59 46 coverage_end = shader_info.end;
60 if (labels.empty()) { 47 if (shader_info.decompilable) {
61 basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); 48 disable_flow_stack = true;
49 const auto insert_block = [this](NodeBlock& nodes, u32 label) {
50 if (label == static_cast<u32>(exit_branch)) {
51 return;
52 }
53 basic_blocks.insert({label, nodes});
54 };
55 const auto& blocks = shader_info.blocks;
56 NodeBlock current_block;
57 u32 current_label = static_cast<u32>(exit_branch);
58 for (auto& block : blocks) {
59 if (shader_info.labels.count(block.start) != 0) {
60 insert_block(current_block, current_label);
61 current_block.clear();
62 current_label = block.start;
63 }
64 if (!block.ignore_branch) {
65 DecodeRangeInner(current_block, block.start, block.end);
66 InsertControlFlow(current_block, block);
67 } else {
68 DecodeRangeInner(current_block, block.start, block.end + 1);
69 }
70 }
71 insert_block(current_block, current_label);
72 return;
73 }
74 LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
75 // we can't decompile it, fallback to standard method
76 for (const auto& block : shader_info.blocks) {
77 basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
78 }
62 return; 79 return;
63 } 80 }
81 LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
82
83 // Now we need to deal with an undecompilable shader. We need to brute force
84 // a shader that captures every position.
85 coverage_begin = main_offset;
86 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
87 coverage_end = shader_end;
88 for (u32 label = main_offset; label < shader_end; label++) {
89 basic_blocks.insert({label, DecodeRange(label, label + 1)});
90 }
91}
64 92
65 labels.insert(main_offset); 93NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
66 94 NodeBlock basic_block;
67 for (const u32 label : labels) { 95 DecodeRangeInner(basic_block, begin, end);
68 const auto next_it = labels.lower_bound(label + 1); 96 return basic_block;
69 const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; 97}
70 98
71 basic_blocks.insert({label, DecodeRange(label, next_label)}); 99void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
100 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
101 pc = DecodeInstr(bb, pc);
72 } 102 }
73} 103}
74 104
75ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { 105void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
76 const auto [iter, inserted] = 106 const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
77 exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); 107 Node result = n;
78 ExitMethod& exit_method = iter->second; 108 if (cond.cc != ConditionCode::T) {
79 if (!inserted) 109 result = Conditional(GetConditionCode(cond.cc), {result});
80 return exit_method;
81
82 for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
83 coverage_begin = std::min(coverage_begin, offset);
84 coverage_end = std::max(coverage_end, offset + 1);
85
86 const Instruction instr = {program_code[offset]};
87 const auto opcode = OpCode::Decode(instr);
88 if (!opcode)
89 continue;
90 switch (opcode->get().GetId()) {
91 case OpCode::Id::EXIT: {
92 // The EXIT instruction can be predicated, which means that the shader can conditionally
93 // end on this instruction. We have to consider the case where the condition is not met
94 // and check the exit method of that other basic block.
95 using Tegra::Shader::Pred;
96 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
97 return exit_method = ExitMethod::AlwaysEnd;
98 } else {
99 const ExitMethod not_met = Scan(offset + 1, end, labels);
100 return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
101 }
102 } 110 }
103 case OpCode::Id::BRA: { 111 if (cond.predicate != Pred::UnusedIndex) {
104 const u32 target = offset + instr.bra.GetBranchTarget(); 112 u32 pred = static_cast<u32>(cond.predicate);
105 labels.insert(target); 113 const bool is_neg = pred > 7;
106 const ExitMethod no_jmp = Scan(offset + 1, end, labels); 114 if (is_neg) {
107 const ExitMethod jmp = Scan(target, end, labels); 115 pred -= 8;
108 return exit_method = ParallelExit(no_jmp, jmp); 116 }
109 } 117 result = Conditional(GetPredicate(pred, is_neg), {result});
110 case OpCode::Id::SSY:
111 case OpCode::Id::PBK: {
112 // The SSY and PBK use a similar encoding as the BRA instruction.
113 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
114 "Constant buffer branching is not supported");
115 const u32 target = offset + instr.bra.GetBranchTarget();
116 labels.insert(target);
117 // Continue scanning for an exit method.
118 break;
119 } 118 }
120 default: 119 return result;
121 break; 120 };
121 if (block.branch.address < 0) {
122 if (block.branch.kills) {
123 Node n = Operation(OperationCode::Discard);
124 n = apply_conditions(block.branch.cond, n);
125 bb.push_back(n);
126 global_code.push_back(n);
127 return;
122 } 128 }
129 Node n = Operation(OperationCode::Exit);
130 n = apply_conditions(block.branch.cond, n);
131 bb.push_back(n);
132 global_code.push_back(n);
133 return;
123 } 134 }
124 return exit_method = ExitMethod::AlwaysReturn; 135 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
125} 136 n = apply_conditions(block.branch.cond, n);
126 137 bb.push_back(n);
127NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { 138 global_code.push_back(n);
128 NodeBlock basic_block;
129 for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
130 pc = DecodeInstr(basic_block, pc);
131 }
132 return basic_block;
133} 139}
134 140
135u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 141u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
@@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
140 146
141 const Instruction instr = {program_code[pc]}; 147 const Instruction instr = {program_code[pc]};
142 const auto opcode = OpCode::Decode(instr); 148 const auto opcode = OpCode::Decode(instr);
149 const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
143 150
144 // Decoding failure 151 // Decoding failure
145 if (!opcode) { 152 if (!opcode) {
146 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); 153 UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
154 bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
155 nv_address, instr.value)));
147 return pc + 1; 156 return pc + 1;
148 } 157 }
149 158
150 bb.push_back( 159 bb.push_back(Comment(
151 Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); 160 fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
152 161
153 using Tegra::Shader::Pred; 162 using Tegra::Shader::Pred;
154 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, 163 UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
@@ -167,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
167 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, 176 {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
168 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 177 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
169 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 178 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
179 {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
170 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 180 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
171 {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, 181 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
172 {OpCode::Type::Image, &ShaderIR::DecodeImage}, 182 {OpCode::Type::Image, &ShaderIR::DecodeImage},
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 87d8fecaa..1473c282a 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -42,11 +42,14 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
42 case OpCode::Id::FMUL_R: 42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: { 43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. 44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", 45 if (instr.fmul.tab5cb8_2 != 0) {
46 instr.fmul.tab5cb8_2.Value()); 46 LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 UNIMPLEMENTED_IF_MSG( 47 instr.fmul.tab5cb8_2.Value());
48 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", 48 }
49 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default 49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
50 53
51 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); 54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
52 55
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 7bcf38f23..6466fc011 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -23,7 +23,9 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName()); 23 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
24 } 24 }
25 } else { 25 } else {
26 UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); 26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
27 LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
28 }
27 } 29 }
28 30
29 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); 31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 4221f0c58..32facd6ba 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -14,6 +14,12 @@ using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::Register; 15using Tegra::Shader::Register;
16 16
17namespace {
18constexpr OperationCode GetFloatSelector(u64 selector) {
19 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
20}
21} // Anonymous namespace
22
17u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { 23u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 24 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 25 const auto opcode = OpCode::Decode(instr);
@@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
22 case OpCode::Id::I2I_R: 28 case OpCode::Id::I2I_R:
23 case OpCode::Id::I2I_C: 29 case OpCode::Id::I2I_C:
24 case OpCode::Id::I2I_IMM: { 30 case OpCode::Id::I2I_IMM: {
25 UNIMPLEMENTED_IF(instr.conversion.selector); 31 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
26 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 32 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
27 UNIMPLEMENTED_IF(instr.alu.saturate_d); 33 UNIMPLEMENTED_IF(instr.alu.saturate_d);
28 34
@@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
57 case OpCode::Id::I2F_R: 63 case OpCode::Id::I2F_R:
58 case OpCode::Id::I2F_C: 64 case OpCode::Id::I2F_C:
59 case OpCode::Id::I2F_IMM: { 65 case OpCode::Id::I2F_IMM: {
60 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 66 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
61 UNIMPLEMENTED_IF(instr.conversion.selector); 67 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
62 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 68 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
63 "Condition codes generation in I2F is not implemented"); 69 "Condition codes generation in I2F is not implemented");
64 70
@@ -82,14 +88,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
82 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); 88 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
83 89
84 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 90 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
91
92 if (instr.conversion.dst_size == Register::Size::Short) {
93 value = Operation(OperationCode::HCastFloat, PRECISE, value);
94 }
95
85 SetRegister(bb, instr.gpr0, value); 96 SetRegister(bb, instr.gpr0, value);
86 break; 97 break;
87 } 98 }
88 case OpCode::Id::F2F_R: 99 case OpCode::Id::F2F_R:
89 case OpCode::Id::F2F_C: 100 case OpCode::Id::F2F_C:
90 case OpCode::Id::F2F_IMM: { 101 case OpCode::Id::F2F_IMM: {
91 UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); 102 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
92 UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); 103 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
93 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 104 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
94 "Condition codes generation in F2F is not implemented"); 105 "Condition codes generation in F2F is not implemented");
95 106
@@ -107,6 +118,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
107 } 118 }
108 }(); 119 }();
109 120
121 if (instr.conversion.src_size == Register::Size::Short) {
122 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
123 std::move(value));
124 } else {
125 ASSERT(instr.conversion.float_src.selector == 0);
126 }
127
110 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); 128 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
111 129
112 value = [&]() { 130 value = [&]() {
@@ -124,19 +142,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
124 default: 142 default:
125 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", 143 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
126 static_cast<u32>(instr.conversion.f2f.rounding.Value())); 144 static_cast<u32>(instr.conversion.f2f.rounding.Value()));
127 return Immediate(0); 145 return value;
128 } 146 }
129 }(); 147 }();
130 value = GetSaturatedFloat(value, instr.alu.saturate_d); 148 value = GetSaturatedFloat(value, instr.alu.saturate_d);
131 149
132 SetInternalFlagsFromFloat(bb, value, instr.generates_cc); 150 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
151
152 if (instr.conversion.dst_size == Register::Size::Short) {
153 value = Operation(OperationCode::HCastFloat, PRECISE, value);
154 }
155
133 SetRegister(bb, instr.gpr0, value); 156 SetRegister(bb, instr.gpr0, value);
134 break; 157 break;
135 } 158 }
136 case OpCode::Id::F2I_R: 159 case OpCode::Id::F2I_R:
137 case OpCode::Id::F2I_C: 160 case OpCode::Id::F2I_C:
138 case OpCode::Id::F2I_IMM: { 161 case OpCode::Id::F2I_IMM: {
139 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); 162 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
140 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 163 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
141 "Condition codes generation in F2I is not implemented"); 164 "Condition codes generation in F2I is not implemented");
142 Node value = [&]() { 165 Node value = [&]() {
@@ -153,6 +176,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
153 } 176 }
154 }(); 177 }();
155 178
179 if (instr.conversion.src_size == Register::Size::Short) {
180 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
181 std::move(value));
182 } else {
183 ASSERT(instr.conversion.float_src.selector == 0);
184 }
185
156 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); 186 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
157 187
158 value = [&]() { 188 value = [&]() {
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index 29be25ca3..ca2f39e8d 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -18,10 +18,12 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); 20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", 21 if (instr.ffma.tab5980_0 != 1) {
22 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO 22 LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", 23 }
24 instr.ffma.tab5980_1.Value()); 24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
25 27
26 const Node op_a = GetRegister(instr.gpr8); 28 const Node op_a = GetRegister(instr.gpr8);
27 29
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index f5013e44a..5614e8a0d 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
15 15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { 16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19 18
20 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, 19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
21 instr.fset.neg_a != 0); 20 instr.fset.neg_a != 0);
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 2323052b0..200c2c983 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -16,10 +16,9 @@ using Tegra::Shader::Pred;
16 16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20 19
21 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, 20 Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
22 instr.fsetp.neg_a != 0); 21 instr.fsetp.neg_a != 0);
23 Node op_b = [&]() { 22 Node op_b = [&]() {
24 if (instr.is_b_imm) { 23 if (instr.is_b_imm) {
25 return GetImmediate19(instr); 24 return GetImmediate19(instr);
@@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
29 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); 28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
30 } 29 }
31 }(); 30 }();
32 op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); 31 op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
33 32
34 // We can't use the constant predicate as destination. 33 // We can't use the constant predicate as destination.
35 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); 34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
36 35
37 const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); 36 const Node predicate =
37 GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); 38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
39 39
40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); 40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index d59d15bd8..afea33e5f 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -18,43 +18,56 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr); 19 const auto opcode = OpCode::Decode(instr);
20 20
21 UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); 21 DEBUG_ASSERT(instr.hsetp2.ftz == 0);
22 22
23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); 23 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); 24 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
25 25
26 Node op_b = [&]() { 26 Tegra::Shader::PredCondition cond{};
27 switch (opcode->get().GetId()) { 27 bool h_and{};
28 case OpCode::Id::HSETP2_R: 28 Node op_b{};
29 return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, 29 switch (opcode->get().GetId()) {
30 instr.hsetp2.negate_b); 30 case OpCode::Id::HSETP2_C:
31 default: 31 cond = instr.hsetp2.cbuf_and_imm.cond;
32 UNREACHABLE(); 32 h_and = instr.hsetp2.cbuf_and_imm.h_and;
33 return Immediate(0); 33 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 } 34 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
35 }(); 35 break;
36 op_b = UnpackHalfFloat(op_b, instr.hsetp2.type_b); 36 case OpCode::Id::HSETP2_IMM:
37 37 cond = instr.hsetp2.cbuf_and_imm.cond;
38 // We can't use the constant predicate as destination. 38 h_and = instr.hsetp2.cbuf_and_imm.h_and;
39 ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); 39 op_b = UnpackHalfImmediate(instr, true);
40 40 break;
41 const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); 41 case OpCode::Id::HSETP2_R:
42 cond = instr.hsetp2.reg.cond;
43 h_and = instr.hsetp2.reg.h_and;
44 op_b =
45 UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
46 instr.hsetp2.reg.negate_b),
47 instr.hsetp2.reg.type_b);
48 break;
49 default:
50 UNREACHABLE();
51 op_b = Immediate(0);
52 }
42 53
43 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); 54 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
44 const OperationCode pair_combiner = 55 const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred);
45 instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2;
46
47 const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, op_a, op_b);
48 const Node first_pred = Operation(pair_combiner, comparison);
49 56
50 // Set the primary predicate to the result of Predicate OP SecondPredicate 57 const auto Write = [&](u64 dest, Node src) {
51 const Node value = Operation(combiner, first_pred, second_pred); 58 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
52 SetPredicate(bb, instr.hsetp2.pred3, value); 59 };
53 60
54 if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { 61 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
55 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled 62 const u64 first = instr.hsetp2.pred0;
56 const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); 63 const u64 second = instr.hsetp2.pred39;
57 SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); 64 if (h_and) {
65 const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
66 Write(first, joined);
67 Write(second, Operation(OperationCode::LogicalNegate, joined));
68 } else {
69 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
70 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
58 } 71 }
59 72
60 return pc; 73 return pc;
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index c3bcf1ae9..5b44cb79c 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -22,9 +22,9 @@ u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { 24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); 25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else { 26 } else {
27 UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); 27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 } 28 }
29 29
30 constexpr auto identity = HalfType::H0_H1; 30 constexpr auto identity = HalfType::H0_H1;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 24f022cc0..77151a24b 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, 95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
96 Tegra::Shader::ImageType type) { 96 Tegra::Shader::ImageType type) {
97 const Node image_register{GetRegister(reg)}; 97 const Node image_register{GetRegister(reg)};
98 const Node base_image{ 98 const auto [base_image, cbuf_index, cbuf_offset]{
99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; 99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
100 const auto cbuf{std::get_if<CbufNode>(&*base_image)};
101 const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
102 const auto cbuf_offset{cbuf_offset_imm->GetValue()};
103 const auto cbuf_index{cbuf->GetIndex()};
104 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; 100 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
105 101
106 // If this image has already been used, return the existing mapping. 102 // If this image has already been used, return the existing mapping.
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 46e3d5905..59809bcd8 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -14,7 +14,6 @@ using Tegra::Shader::OpCode;
14 14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { 15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr);
18 17
19 const Node op_a = GetRegister(instr.gpr8); 18 const Node op_a = GetRegister(instr.gpr8);
20 const Node op_b = [&]() { 19 const Node op_b = [&]() {
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index dd20775d7..25e48fef8 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -16,7 +16,6 @@ using Tegra::Shader::Pred;
16 16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { 17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]}; 18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20 19
21 const Node op_a = GetRegister(instr.gpr8); 20 const Node op_a = GetRegister(instr.gpr8);
22 21
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 80fc0ccfc..ed108bea8 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -95,10 +95,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
95 const Node op_b = 95 const Node op_b =
96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); 96 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
97 97
98 SetTemporal(bb, 0, op_a); 98 SetTemporary(bb, 0, op_a);
99 SetTemporal(bb, 1, op_b); 99 SetTemporary(bb, 1, op_b);
100 SetRegister(bb, instr.gpr0, GetTemporal(0)); 100 SetRegister(bb, instr.gpr0, GetTemporary(0));
101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); 101 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
102 break; 102 break;
103 } 103 }
104 default: 104 default:
@@ -136,9 +136,9 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
136 } 136 }
137 }(); 137 }();
138 for (u32 i = 0; i < count; ++i) 138 for (u32 i = 0; i < count; ++i)
139 SetTemporal(bb, i, GetLmem(i * 4)); 139 SetTemporary(bb, i, GetLmem(i * 4));
140 for (u32 i = 0; i < count; ++i) 140 for (u32 i = 0; i < count; ++i)
141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 141 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
142 break; 142 break;
143 } 143 }
144 default: 144 default:
@@ -172,10 +172,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 172 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 173 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
174 174
175 SetTemporal(bb, i, gmem); 175 SetTemporary(bb, i, gmem);
176 } 176 }
177 for (u32 i = 0; i < count; ++i) { 177 for (u32 i = 0; i < count; ++i) {
178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 178 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
179 } 179 }
180 break; 180 break;
181 } 181 }
@@ -253,11 +253,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
253 TrackAndGetGlobalMemory(bb, instr, true); 253 TrackAndGetGlobalMemory(bb, instr, true);
254 254
255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} 255 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
256 SetTemporal(bb, 0, real_address_base); 256 SetTemporary(bb, 0, real_address_base);
257 257
258 const u32 count = GetUniformTypeElementsCount(type); 258 const u32 count = GetUniformTypeElementsCount(type);
259 for (u32 i = 0; i < count; ++i) { 259 for (u32 i = 0; i < count; ++i) {
260 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); 260 SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
261 } 261 }
262 for (u32 i = 0; i < count; ++i) { 262 for (u32 i = 0; i < count; ++i) {
263 const Node it_offset = Immediate(i * 4); 263 const Node it_offset = Immediate(i * 4);
@@ -265,7 +265,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 265 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 266 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
267 267
268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); 268 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
269 } 269 }
270 break; 270 break;
271 } 271 }
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
297 const auto addr_register{GetRegister(instr.gmem.gpr)}; 297 const auto addr_register{GetRegister(instr.gmem.gpr)};
298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
299 299
300 const Node base_address{ 300 const auto [base_address, index, offset] =
301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; 301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
302 const auto cbuf = std::get_if<CbufNode>(&*base_address); 302 ASSERT(base_address != nullptr);
303 ASSERT(cbuf != nullptr);
304 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
305 ASSERT(cbuf_offset_imm != nullptr);
306 const auto cbuf_offset = cbuf_offset_imm->GetValue();
307 303
308 bb.push_back( 304 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
309 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
310 305
311 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; 306 const GlobalMemoryBase descriptor{index, offset};
312 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 307 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
313 auto& usage = entry->second; 308 auto& usage = entry->second;
314 if (is_write) { 309 if (is_write) {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46a8ab82..d46e0f823 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -22,6 +22,12 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
22 const auto opcode = OpCode::Decode(instr); 22 const auto opcode = OpCode::Decode(instr);
23 23
24 switch (opcode->get().GetId()) { 24 switch (opcode->get().GetId()) {
25 case OpCode::Id::NOP: {
26 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
27 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
28 // With the previous preconditions, this instruction is a no-operation.
29 break;
30 }
25 case OpCode::Id::EXIT: { 31 case OpCode::Id::EXIT: {
26 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 32 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
27 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", 33 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}",
@@ -68,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
68 case SystemVariable::InvocationInfo: 74 case SystemVariable::InvocationInfo:
69 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); 75 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
70 return Immediate(0u); 76 return Immediate(0u);
77 case SystemVariable::Tid: {
78 Node value = Immediate(0);
79 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
80 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9);
81 value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
82 return value;
83 }
71 case SystemVariable::TidX: 84 case SystemVariable::TidX:
72 return Operation(OperationCode::LocalInvocationIdX); 85 return Operation(OperationCode::LocalInvocationIdX);
73 case SystemVariable::TidY: 86 case SystemVariable::TidY:
@@ -91,11 +104,46 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
91 break; 104 break;
92 } 105 }
93 case OpCode::Id::BRA: { 106 case OpCode::Id::BRA: {
94 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 107 Node branch;
95 "BRA with constant buffers are not implemented"); 108 if (instr.bra.constant_buffer == 0) {
109 const u32 target = pc + instr.bra.GetBranchTarget();
110 branch = Operation(OperationCode::Branch, Immediate(target));
111 } else {
112 const u32 target = pc + 1;
113 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
114 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
115 PRECISE, op_a, Immediate(3));
116 const Node operand =
117 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
118 branch = Operation(OperationCode::BranchIndirect, operand);
119 }
96 120
97 const u32 target = pc + instr.bra.GetBranchTarget(); 121 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
98 const Node branch = Operation(OperationCode::Branch, Immediate(target)); 122 if (cc != Tegra::Shader::ConditionCode::T) {
123 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
124 } else {
125 bb.push_back(branch);
126 }
127 break;
128 }
129 case OpCode::Id::BRX: {
130 Node operand;
131 if (instr.brx.constant_buffer != 0) {
132 const s32 target = pc + 1;
133 const Node index = GetRegister(instr.gpr8);
134 const Node op_a =
135 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
136 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
137 PRECISE, op_a, Immediate(3));
138 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
139 } else {
140 const s32 target = pc + instr.brx.GetBranchExtend();
141 const Node op_a = GetRegister(instr.gpr8);
142 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
143 PRECISE, op_a, Immediate(3));
144 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
145 }
146 const Node branch = Operation(OperationCode::BranchIndirect, operand);
99 147
100 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 148 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
101 if (cc != Tegra::Shader::ConditionCode::T) { 149 if (cc != Tegra::Shader::ConditionCode::T) {
@@ -109,6 +157,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 157 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
110 "Constant buffer flow is not supported"); 158 "Constant buffer flow is not supported");
111 159
160 if (disable_flow_stack) {
161 break;
162 }
163
112 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. 164 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
113 const u32 target = pc + instr.bra.GetBranchTarget(); 165 const u32 target = pc + instr.bra.GetBranchTarget();
114 bb.push_back( 166 bb.push_back(
@@ -119,6 +171,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
119 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, 171 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
120 "Constant buffer PBK is not supported"); 172 "Constant buffer PBK is not supported");
121 173
174 if (disable_flow_stack) {
175 break;
176 }
177
122 // PBK pushes to a stack the address where BRK will jump to. 178 // PBK pushes to a stack the address where BRK will jump to.
123 const u32 target = pc + instr.bra.GetBranchTarget(); 179 const u32 target = pc + instr.bra.GetBranchTarget();
124 bb.push_back( 180 bb.push_back(
@@ -130,6 +186,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
130 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", 186 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
131 static_cast<u32>(cc)); 187 static_cast<u32>(cc));
132 188
189 if (disable_flow_stack) {
190 break;
191 }
192
133 // The SYNC opcode jumps to the address previously set by the SSY opcode 193 // The SYNC opcode jumps to the address previously set by the SSY opcode
134 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); 194 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
135 break; 195 break;
@@ -138,6 +198,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
138 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; 198 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
139 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", 199 UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
140 static_cast<u32>(cc)); 200 static_cast<u32>(cc));
201 if (disable_flow_stack) {
202 break;
203 }
141 204
142 // The BRK opcode jumps to the address previously set by the PBK opcode 205 // The BRK opcode jumps to the address previously set by the PBK opcode
143 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); 206 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index febbfeb50..84dbc50fe 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
15 15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { 16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]}; 17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19 18
20 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
21 "Condition codes generation in PSET is not implemented"); 20 "Condition codes generation in PSET is not implemented");
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index cb480be9b..0b934a069 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -181,10 +181,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
181 const Node value = 181 const Node value =
182 Operation(OperationCode::TextureQueryDimensions, meta, 182 Operation(OperationCode::TextureQueryDimensions, meta,
183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); 183 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
184 SetTemporal(bb, indexer++, value); 184 SetTemporary(bb, indexer++, value);
185 } 185 }
186 for (u32 i = 0; i < indexer; ++i) { 186 for (u32 i = 0; i < indexer; ++i) {
187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 187 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
188 } 188 }
189 break; 189 break;
190 } 190 }
@@ -238,10 +238,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
238 auto params = coords; 238 auto params = coords;
239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; 239 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 240 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
241 SetTemporal(bb, indexer++, value); 241 SetTemporary(bb, indexer++, value);
242 } 242 }
243 for (u32 i = 0; i < indexer; ++i) { 243 for (u32 i = 0; i < indexer; ++i) {
244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 244 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
245 } 245 }
246 break; 246 break;
247 } 247 }
@@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); 269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
270 } 270 }
271 271
272 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); 272 const Node4 components = GetTldsCode(instr, texture_type, is_array);
273
274 if (instr.tlds.fp32_flag) {
275 WriteTexsInstructionFloat(bb, instr, components);
276 } else {
277 WriteTexsInstructionHalfFloat(bb, instr, components);
278 }
273 break; 279 break;
274 } 280 }
275 default: 281 default:
@@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
302const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
303 bool is_array, bool is_shadow) { 309 bool is_array, bool is_shadow) {
304 const Node sampler_register = GetRegister(reg); 310 const Node sampler_register = GetRegister(reg);
305 const Node base_sampler = 311 const auto [base_sampler, cbuf_index, cbuf_offset] =
306 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
307 const auto cbuf = std::get_if<CbufNode>(&*base_sampler); 313 ASSERT(base_sampler != nullptr);
308 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
309 ASSERT(cbuf_offset_imm != nullptr);
310 const auto cbuf_offset = cbuf_offset_imm->GetValue();
311 const auto cbuf_index = cbuf->GetIndex();
312 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
313 315
314 // If this sampler has already been used, return the existing mapping. 316 // If this sampler has already been used, return the existing mapping.
@@ -334,11 +336,11 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
334 // Skip disabled components 336 // Skip disabled components
335 continue; 337 continue;
336 } 338 }
337 SetTemporal(bb, dest_elem++, components[elem]); 339 SetTemporary(bb, dest_elem++, components[elem]);
338 } 340 }
339 // After writing values in temporals, move them to the real registers 341 // After writing values in temporals, move them to the real registers
340 for (u32 i = 0; i < dest_elem; ++i) { 342 for (u32 i = 0; i < dest_elem; ++i) {
341 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); 343 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
342 } 344 }
343} 345}
344 346
@@ -351,17 +353,17 @@ void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
351 for (u32 component = 0; component < 4; ++component) { 353 for (u32 component = 0; component < 4; ++component) {
352 if (!instr.texs.IsComponentEnabled(component)) 354 if (!instr.texs.IsComponentEnabled(component))
353 continue; 355 continue;
354 SetTemporal(bb, dest_elem++, components[component]); 356 SetTemporary(bb, dest_elem++, components[component]);
355 } 357 }
356 358
357 for (u32 i = 0; i < dest_elem; ++i) { 359 for (u32 i = 0; i < dest_elem; ++i) {
358 if (i < 2) { 360 if (i < 2) {
359 // Write the first two swizzle components to gpr0 and gpr0+1 361 // Write the first two swizzle components to gpr0 and gpr0+1
360 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); 362 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
361 } else { 363 } else {
362 ASSERT(instr.texs.HasTwoDestinations()); 364 ASSERT(instr.texs.HasTwoDestinations());
363 // Write the rest of the swizzle components to gpr28 and gpr28+1 365 // Write the rest of the swizzle components to gpr28 and gpr28+1
364 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); 366 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
365 } 367 }
366 } 368 }
367} 369}
@@ -389,11 +391,11 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
389 return; 391 return;
390 } 392 }
391 393
392 SetTemporal(bb, 0, first_value); 394 SetTemporary(bb, 0, first_value);
393 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); 395 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
394 396
395 SetRegister(bb, instr.gpr0, GetTemporal(0)); 397 SetRegister(bb, instr.gpr0, GetTemporary(0));
396 SetRegister(bb, instr.gpr28, GetTemporal(1)); 398 SetRegister(bb, instr.gpr28, GetTemporary(1));
397} 399}
398 400
399Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 401Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
new file mode 100644
index 000000000..04ca74f46
--- /dev/null
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,55 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::VoteOperation;
17
18namespace {
19OperationCode GetOperationCode(VoteOperation vote_op) {
20 switch (vote_op) {
21 case VoteOperation::All:
22 return OperationCode::VoteAll;
23 case VoteOperation::Any:
24 return OperationCode::VoteAny;
25 case VoteOperation::Eq:
26 return OperationCode::VoteEqual;
27 default:
28 UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
29 return OperationCode::VoteAll;
30 }
31}
32} // Anonymous namespace
33
34u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
35 const Instruction instr = {program_code[pc]};
36 const auto opcode = OpCode::Decode(instr);
37
38 switch (opcode->get().GetId()) {
39 case OpCode::Id::VOTE: {
40 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
41 const Node active = Operation(OperationCode::BallotThread, value);
42 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
43 SetRegister(bb, instr.gpr0, active);
44 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break;
46 }
47 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break;
50 }
51
52 return pc;
53}
54
55} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 93dee77d1..206961909 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -73,8 +73,8 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
73 if (is_psl) { 73 if (is_psl) {
74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); 74 product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16));
75 } 75 }
76 SetTemporal(bb, 0, product); 76 SetTemporary(bb, 0, product);
77 product = GetTemporal(0); 77 product = GetTemporary(0);
78 78
79 const Node original_c = op_c; 79 const Node original_c = op_c;
80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error 80 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
@@ -98,13 +98,13 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
98 } 98 }
99 }(); 99 }();
100 100
101 SetTemporal(bb, 1, op_c); 101 SetTemporary(bb, 1, op_c);
102 op_c = GetTemporal(1); 102 op_c = GetTemporary(1);
103 103
104 // TODO(Rodrigo): Use an appropiate sign for this operation 104 // TODO(Rodrigo): Use an appropiate sign for this operation
105 Node sum = Operation(OperationCode::IAdd, product, op_c); 105 Node sum = Operation(OperationCode::IAdd, product, op_c);
106 SetTemporal(bb, 2, sum); 106 SetTemporary(bb, 2, sum);
107 sum = GetTemporal(2); 107 sum = GetTemporary(2);
108 if (is_merge) { 108 if (is_merge) {
109 const Node a = BitfieldExtract(sum, 0, 16); 109 const Node a = BitfieldExtract(sum, 0, 16);
110 const Node b = 110 const Node b =
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 0ac83fcf0..5db9313c4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -30,6 +30,8 @@ enum class OperationCode {
30 FNegate, /// (MetaArithmetic, float a) -> float 30 FNegate, /// (MetaArithmetic, float a) -> float
31 FAbsolute, /// (MetaArithmetic, float a) -> float 31 FAbsolute, /// (MetaArithmetic, float a) -> float
32 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float 32 FClamp, /// (MetaArithmetic, float value, float min, float max) -> float
33 FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float
34 FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float
33 FMin, /// (MetaArithmetic, float a, float b) -> float 35 FMin, /// (MetaArithmetic, float a, float b) -> float
34 FMax, /// (MetaArithmetic, float a, float b) -> float 36 FMax, /// (MetaArithmetic, float a, float b) -> float
35 FCos, /// (MetaArithmetic, float a) -> float 37 FCos, /// (MetaArithmetic, float a) -> float
@@ -83,17 +85,18 @@ enum class OperationCode {
83 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint 85 UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
84 UBitCount, /// (MetaArithmetic, uint) -> uint 86 UBitCount, /// (MetaArithmetic, uint) -> uint
85 87
86 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 88 HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
87 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 89 HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
88 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 90 HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
89 HAbsolute, /// (f16vec2 a) -> f16vec2 91 HAbsolute, /// (f16vec2 a) -> f16vec2
90 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 92 HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2
91 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 93 HClamp, /// (f16vec2 src, float min, float max) -> f16vec2
92 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 94 HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
93 HMergeF32, /// (f16vec2 src) -> float 95 HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2
94 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 96 HMergeF32, /// (f16vec2 src) -> float
95 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 97 HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2
96 HPack2, /// (float a, float b) -> f16vec2 98 HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2
99 HPack2, /// (float a, float b) -> f16vec2
97 100
98 LogicalAssign, /// (bool& dst, bool src) -> void 101 LogicalAssign, /// (bool& dst, bool src) -> void
99 LogicalAnd, /// (bool a, bool b) -> bool 102 LogicalAnd, /// (bool a, bool b) -> bool
@@ -101,8 +104,7 @@ enum class OperationCode {
101 LogicalXor, /// (bool a, bool b) -> bool 104 LogicalXor, /// (bool a, bool b) -> bool
102 LogicalNegate, /// (bool a) -> bool 105 LogicalNegate, /// (bool a) -> bool
103 LogicalPick2, /// (bool2 pair, uint index) -> bool 106 LogicalPick2, /// (bool2 pair, uint index) -> bool
104 LogicalAll2, /// (bool2 a) -> bool 107 LogicalAnd2, /// (bool2 a) -> bool
105 LogicalAny2, /// (bool2 a) -> bool
106 108
107 LogicalFLessThan, /// (float a, float b) -> bool 109 LogicalFLessThan, /// (float a, float b) -> bool
108 LogicalFEqual, /// (float a, float b) -> bool 110 LogicalFEqual, /// (float a, float b) -> bool
@@ -148,11 +150,12 @@ enum class OperationCode {
148 150
149 ImageStore, /// (MetaImage, float[N] coords) -> void 151 ImageStore, /// (MetaImage, float[N] coords) -> void
150 152
151 Branch, /// (uint branch_target) -> void 153 Branch, /// (uint branch_target) -> void
152 PushFlowStack, /// (uint branch_target) -> void 154 BranchIndirect, /// (uint branch_target) -> void
153 PopFlowStack, /// () -> void 155 PushFlowStack, /// (uint branch_target) -> void
154 Exit, /// () -> void 156 PopFlowStack, /// () -> void
155 Discard, /// () -> void 157 Exit, /// () -> void
158 Discard, /// () -> void
156 159
157 EmitVertex, /// () -> void 160 EmitVertex, /// () -> void
158 EndPrimitive, /// () -> void 161 EndPrimitive, /// () -> void
@@ -165,6 +168,11 @@ enum class OperationCode {
165 WorkGroupIdY, /// () -> uint 168 WorkGroupIdY, /// () -> uint
166 WorkGroupIdZ, /// () -> uint 169 WorkGroupIdZ, /// () -> uint
167 170
171 BallotThread, /// (bool) -> uint
172 VoteAll, /// (bool) -> bool
173 VoteAny, /// (bool) -> bool
174 VoteEqual, /// (bool) -> bool
175
168 Amount, 176 Amount,
169}; 177};
170 178
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp
index 6fccbbba3..b3dcd291c 100644
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@@ -12,7 +12,7 @@
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14Node Conditional(Node condition, std::vector<Node> code) { 14Node Conditional(Node condition, std::vector<Node> code) {
15 return MakeNode<ConditionalNode>(condition, std::move(code)); 15 return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
16} 16}
17 17
18Node Comment(std::string text) { 18Node Comment(std::string text) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 11b545cca..1e5c7f660 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition;
22using Tegra::Shader::PredOperation; 22using Tegra::Shader::PredOperation;
23using Tegra::Shader::Register; 23using Tegra::Shader::Register;
24 24
25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) 25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
26 : program_code{program_code}, main_offset{main_offset} { 26 : program_code{program_code}, main_offset{main_offset}, program_size{size} {
27 Decode(); 27 Decode();
28} 28}
29 29
@@ -61,8 +61,17 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
61 const auto [entry, is_new] = used_cbufs.try_emplace(index); 61 const auto [entry, is_new] = used_cbufs.try_emplace(index);
62 entry->second.MarkAsUsedIndirect(); 62 entry->second.MarkAsUsedIndirect();
63 63
64 const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); 64 Node final_offset = [&] {
65 return MakeNode<CbufNode>(index, final_offset); 65 // Attempt to inline constant buffer without a variable offset. This is done to allow
66 // tracking LDC calls.
67 if (const auto gpr = std::get_if<GprNode>(&*node)) {
68 if (gpr->GetIndex() == Register::ZeroIndex) {
69 return Immediate(offset);
70 }
71 }
72 return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
73 }();
74 return MakeNode<CbufNode>(index, std::move(final_offset));
66} 75}
67 76
68Node ShaderIR::GetPredicate(u64 pred_, bool negated) { 77Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
@@ -80,7 +89,7 @@ Node ShaderIR::GetPredicate(bool immediate) {
80 89
81Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { 90Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
82 used_input_attributes.emplace(index); 91 used_input_attributes.emplace(index);
83 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 92 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
84} 93}
85 94
86Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { 95Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
@@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
89} 98}
90 99
91Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { 100Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
101 if (index == Attribute::Index::LayerViewportPointSize) {
102 switch (element) {
103 case 0:
104 UNIMPLEMENTED();
105 break;
106 case 1:
107 uses_layer = true;
108 break;
109 case 2:
110 uses_viewport_index = true;
111 break;
112 case 3:
113 uses_point_size = true;
114 break;
115 }
116 }
92 if (index == Attribute::Index::ClipDistances0123 || 117 if (index == Attribute::Index::ClipDistances0123 ||
93 index == Attribute::Index::ClipDistances4567) { 118 index == Attribute::Index::ClipDistances4567) {
94 const auto clip_index = 119 const auto clip_index =
@@ -97,7 +122,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
97 } 122 }
98 used_output_attributes.insert(index); 123 used_output_attributes.insert(index);
99 124
100 return MakeNode<AbufNode>(index, static_cast<u32>(element), buffer); 125 return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
101} 126}
102 127
103Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { 128Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
@@ -109,19 +134,19 @@ Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
109} 134}
110 135
111Node ShaderIR::GetLocalMemory(Node address) { 136Node ShaderIR::GetLocalMemory(Node address) {
112 return MakeNode<LmemNode>(address); 137 return MakeNode<LmemNode>(std::move(address));
113} 138}
114 139
115Node ShaderIR::GetTemporal(u32 id) { 140Node ShaderIR::GetTemporary(u32 id) {
116 return GetRegister(Register::ZeroIndex + 1 + id); 141 return GetRegister(Register::ZeroIndex + 1 + id);
117} 142}
118 143
119Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { 144Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
120 if (absolute) { 145 if (absolute) {
121 value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); 146 value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
122 } 147 }
123 if (negate) { 148 if (negate) {
124 value = Operation(OperationCode::FNegate, NO_PRECISE, value); 149 value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
125 } 150 }
126 return value; 151 return value;
127} 152}
@@ -130,24 +155,26 @@ Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
130 if (!saturate) { 155 if (!saturate) {
131 return value; 156 return value;
132 } 157 }
133 const Node positive_zero = Immediate(std::copysignf(0, 1)); 158
134 const Node positive_one = Immediate(1.0f); 159 Node positive_zero = Immediate(std::copysignf(0, 1));
135 return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); 160 Node positive_one = Immediate(1.0f);
161 return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
162 std::move(positive_one));
136} 163}
137 164
138Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { 165Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
139 switch (size) { 166 switch (size) {
140 case Register::Size::Byte: 167 case Register::Size::Byte:
141 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 168 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
142 Immediate(24)); 169 std::move(value), Immediate(24));
143 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 170 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
144 Immediate(24)); 171 std::move(value), Immediate(24));
145 return value; 172 return value;
146 case Register::Size::Short: 173 case Register::Size::Short:
147 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, 174 value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
148 Immediate(16)); 175 std::move(value), Immediate(16));
149 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, 176 value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
150 Immediate(16)); 177 std::move(value), Immediate(16));
151 case Register::Size::Word: 178 case Register::Size::Word:
152 // Default - do nothing 179 // Default - do nothing
153 return value; 180 return value;
@@ -163,27 +190,29 @@ Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, b
163 return value; 190 return value;
164 } 191 }
165 if (absolute) { 192 if (absolute) {
166 value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); 193 value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
167 } 194 }
168 if (negate) { 195 if (negate) {
169 value = Operation(OperationCode::INegate, NO_PRECISE, value); 196 value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
170 } 197 }
171 return value; 198 return value;
172} 199}
173 200
174Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { 201Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
175 const Node value = Immediate(instr.half_imm.PackImmediates()); 202 Node value = Immediate(instr.half_imm.PackImmediates());
176 if (!has_negation) { 203 if (!has_negation) {
177 return value; 204 return value;
178 } 205 }
179 const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
180 const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
181 206
182 return Operation(OperationCode::HNegate, NO_PRECISE, value, first_negate, second_negate); 207 Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
208 Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
209
210 return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
211 std::move(second_negate));
183} 212}
184 213
185Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { 214Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
186 return Operation(OperationCode::HUnpack, type, value); 215 return Operation(OperationCode::HUnpack, type, std::move(value));
187} 216}
188 217
189Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { 218Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
@@ -191,11 +220,11 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
191 case Tegra::Shader::HalfMerge::H0_H1: 220 case Tegra::Shader::HalfMerge::H0_H1:
192 return src; 221 return src;
193 case Tegra::Shader::HalfMerge::F32: 222 case Tegra::Shader::HalfMerge::F32:
194 return Operation(OperationCode::HMergeF32, src); 223 return Operation(OperationCode::HMergeF32, std::move(src));
195 case Tegra::Shader::HalfMerge::Mrg_H0: 224 case Tegra::Shader::HalfMerge::Mrg_H0:
196 return Operation(OperationCode::HMergeH0, dest, src); 225 return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
197 case Tegra::Shader::HalfMerge::Mrg_H1: 226 case Tegra::Shader::HalfMerge::Mrg_H1:
198 return Operation(OperationCode::HMergeH1, dest, src); 227 return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
199 } 228 }
200 UNREACHABLE(); 229 UNREACHABLE();
201 return src; 230 return src;
@@ -203,10 +232,10 @@ Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
203 232
204Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { 233Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
205 if (absolute) { 234 if (absolute) {
206 value = Operation(OperationCode::HAbsolute, NO_PRECISE, value); 235 value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
207 } 236 }
208 if (negate) { 237 if (negate) {
209 value = Operation(OperationCode::HNegate, NO_PRECISE, value, GetPredicate(true), 238 value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
210 GetPredicate(true)); 239 GetPredicate(true));
211 } 240 }
212 return value; 241 return value;
@@ -216,9 +245,11 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
216 if (!saturate) { 245 if (!saturate) {
217 return value; 246 return value;
218 } 247 }
219 const Node positive_zero = Immediate(std::copysignf(0, 1)); 248
220 const Node positive_one = Immediate(1.0f); 249 Node positive_zero = Immediate(std::copysignf(0, 1));
221 return Operation(OperationCode::HClamp, NO_PRECISE, value, positive_zero, positive_one); 250 Node positive_one = Immediate(1.0f);
251 return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
252 std::move(positive_one));
222} 253}
223 254
224Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 255Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
@@ -246,7 +277,6 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
246 condition == PredCondition::LessEqualWithNan || 277 condition == PredCondition::LessEqualWithNan ||
247 condition == PredCondition::GreaterThanWithNan || 278 condition == PredCondition::GreaterThanWithNan ||
248 condition == PredCondition::GreaterEqualWithNan) { 279 condition == PredCondition::GreaterEqualWithNan) {
249
250 predicate = Operation(OperationCode::LogicalOr, predicate, 280 predicate = Operation(OperationCode::LogicalOr, predicate,
251 Operation(OperationCode::LogicalFIsNan, op_a)); 281 Operation(OperationCode::LogicalFIsNan, op_a));
252 predicate = Operation(OperationCode::LogicalOr, predicate, 282 predicate = Operation(OperationCode::LogicalOr, predicate,
@@ -275,7 +305,8 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
275 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
276 "Unknown predicate comparison operation"); 306 "Unknown predicate comparison operation");
277 307
278 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); 308 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
309 std::move(op_b));
279 310
280 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || 311 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
281 condition == PredCondition::NotEqualWithNan || 312 condition == PredCondition::NotEqualWithNan ||
@@ -305,9 +336,7 @@ Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition
305 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 336 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
306 "Unknown predicate comparison operation"); 337 "Unknown predicate comparison operation");
307 338
308 const Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); 339 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
309
310 return predicate;
311} 340}
312 341
313OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 342OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
@@ -333,31 +362,32 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
333} 362}
334 363
335void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { 364void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
336 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); 365 bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
337} 366}
338 367
339void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { 368void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
340 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); 369 bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
341} 370}
342 371
343void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { 372void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
344 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); 373 bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
345} 374}
346 375
347void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { 376void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
348 bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); 377 bb.push_back(
378 Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
349} 379}
350 380
351void ShaderIR::SetTemporal(NodeBlock& bb, u32 id, Node value) { 381void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
352 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 382 SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
353} 383}
354 384
355void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { 385void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
356 if (!sets_cc) { 386 if (!sets_cc) {
357 return; 387 return;
358 } 388 }
359 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); 389 Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f));
360 SetInternalFlag(bb, InternalFlag::Zero, zerop); 390 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
361 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 391 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
362} 392}
363 393
@@ -365,13 +395,18 @@ void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_
365 if (!sets_cc) { 395 if (!sets_cc) {
366 return; 396 return;
367 } 397 }
368 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); 398 Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
369 SetInternalFlag(bb, InternalFlag::Zero, zerop); 399 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
370 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 400 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
371} 401}
372 402
373Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { 403Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
374 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), 404 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
405 Immediate(offset), Immediate(bits));
406}
407
408Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
409 return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
375 Immediate(bits)); 410 Immediate(bits));
376} 411}
377 412
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index e22548208..bcc9b79b6 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,13 +5,10 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <cstring>
9#include <map> 8#include <map>
10#include <optional> 9#include <optional>
11#include <set> 10#include <set>
12#include <string>
13#include <tuple> 11#include <tuple>
14#include <variant>
15#include <vector> 12#include <vector>
16 13
17#include "common/common_types.h" 14#include "common/common_types.h"
@@ -22,18 +19,12 @@
22 19
23namespace VideoCommon::Shader { 20namespace VideoCommon::Shader {
24 21
22struct ShaderBlock;
23
25using ProgramCode = std::vector<u64>; 24using ProgramCode = std::vector<u64>;
26 25
27constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; 26constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
28 27
29/// Describes the behaviour of code path of a given entry point and a return point.
30enum class ExitMethod {
31 Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
32 AlwaysReturn, ///< All code paths reach the return point.
33 Conditional, ///< Code path reaches the return point or an END instruction conditionally.
34 AlwaysEnd, ///< All code paths reach a END instruction.
35};
36
37class ConstBuffer { 28class ConstBuffer {
38public: 29public:
39 explicit ConstBuffer(u32 max_offset, bool is_indirect) 30 explicit ConstBuffer(u32 max_offset, bool is_indirect)
@@ -73,7 +64,7 @@ struct GlobalMemoryUsage {
73 64
74class ShaderIR final { 65class ShaderIR final {
75public: 66public:
76 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); 67 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
77 ~ShaderIR(); 68 ~ShaderIR();
78 69
79 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 70 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -121,6 +112,18 @@ public:
121 return static_cast<std::size_t>(coverage_end * sizeof(u64)); 112 return static_cast<std::size_t>(coverage_end * sizeof(u64));
122 } 113 }
123 114
115 bool UsesLayer() const {
116 return uses_layer;
117 }
118
119 bool UsesViewportIndex() const {
120 return uses_viewport_index;
121 }
122
123 bool UsesPointSize() const {
124 return uses_point_size;
125 }
126
124 bool HasPhysicalAttributes() const { 127 bool HasPhysicalAttributes() const {
125 return uses_physical_attributes; 128 return uses_physical_attributes;
126 } 129 }
@@ -129,12 +132,20 @@ public:
129 return header; 132 return header;
130 } 133 }
131 134
135 bool IsFlowStackDisabled() const {
136 return disable_flow_stack;
137 }
138
139 u32 ConvertAddressToNvidiaSpace(const u32 address) const {
140 return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
141 }
142
132private: 143private:
133 void Decode(); 144 void Decode();
134 145
135 ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
136
137 NodeBlock DecodeRange(u32 begin, u32 end); 146 NodeBlock DecodeRange(u32 begin, u32 end);
147 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
148 void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
138 149
139 /** 150 /**
140 * Decodes a single instruction from Tegra to IR. 151 * Decodes a single instruction from Tegra to IR.
@@ -156,6 +167,7 @@ private:
156 u32 DecodeFfma(NodeBlock& bb, u32 pc); 167 u32 DecodeFfma(NodeBlock& bb, u32 pc);
157 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 168 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
158 u32 DecodeConversion(NodeBlock& bb, u32 pc); 169 u32 DecodeConversion(NodeBlock& bb, u32 pc);
170 u32 DecodeWarp(NodeBlock& bb, u32 pc);
159 u32 DecodeMemory(NodeBlock& bb, u32 pc); 171 u32 DecodeMemory(NodeBlock& bb, u32 pc);
160 u32 DecodeTexture(NodeBlock& bb, u32 pc); 172 u32 DecodeTexture(NodeBlock& bb, u32 pc);
161 u32 DecodeImage(NodeBlock& bb, u32 pc); 173 u32 DecodeImage(NodeBlock& bb, u32 pc);
@@ -196,8 +208,8 @@ private:
196 Node GetInternalFlag(InternalFlag flag, bool negated = false); 208 Node GetInternalFlag(InternalFlag flag, bool negated = false);
197 /// Generates a node representing a local memory address 209 /// Generates a node representing a local memory address
198 Node GetLocalMemory(Node address); 210 Node GetLocalMemory(Node address);
199 /// Generates a temporal, internally it uses a post-RZ register 211 /// Generates a temporary, internally it uses a post-RZ register
200 Node GetTemporal(u32 id); 212 Node GetTemporary(u32 id);
201 213
202 /// Sets a register. src value must be a number-evaluated node. 214 /// Sets a register. src value must be a number-evaluated node.
203 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); 215 void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
@@ -207,8 +219,8 @@ private:
207 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); 219 void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
208 /// Sets a local memory address. address and value must be a number-evaluated node 220 /// Sets a local memory address. address and value must be a number-evaluated node
209 void SetLocalMemory(NodeBlock& bb, Node address, Node value); 221 void SetLocalMemory(NodeBlock& bb, Node address, Node value);
210 /// Sets a temporal. Internally it uses a post-RZ register 222 /// Sets a temporary. Internally it uses a post-RZ register
211 void SetTemporal(NodeBlock& bb, u32 id, Node value); 223 void SetTemporary(NodeBlock& bb, u32 id, Node value);
212 224
213 /// Sets internal flags from a float 225 /// Sets internal flags from a float
214 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); 226 void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
@@ -268,6 +280,9 @@ private:
268 /// Extracts a sequence of bits from a node 280 /// Extracts a sequence of bits from a node
269 Node BitfieldExtract(Node value, u32 offset, u32 bits); 281 Node BitfieldExtract(Node value, u32 offset, u32 bits);
270 282
283 /// Inserts a sequence of bits from a node
284 Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
285
271 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 286 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
272 const Node4& components); 287 const Node4& components);
273 288
@@ -314,7 +329,7 @@ private:
314 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 329 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
315 Node op_c, Node imm_lut, bool sets_cc); 330 Node op_c, Node imm_lut, bool sets_cc);
316 331
317 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 332 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
318 333
319 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 334 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
320 335
@@ -326,10 +341,11 @@ private:
326 341
327 const ProgramCode& program_code; 342 const ProgramCode& program_code;
328 const u32 main_offset; 343 const u32 main_offset;
344 const std::size_t program_size;
345 bool disable_flow_stack{};
329 346
330 u32 coverage_begin{}; 347 u32 coverage_begin{};
331 u32 coverage_end{}; 348 u32 coverage_end{};
332 std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
333 349
334 std::map<u32, NodeBlock> basic_blocks; 350 std::map<u32, NodeBlock> basic_blocks;
335 NodeBlock global_code; 351 NodeBlock global_code;
@@ -343,6 +359,9 @@ private:
343 std::set<Image> used_images; 359 std::set<Image> used_images;
344 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 360 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
345 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 361 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
362 bool uses_layer{};
363 bool uses_viewport_index{};
364 bool uses_point_size{};
346 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes 365 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
347 366
348 Tegra::Shader::Header header; 367 Tegra::Shader::Header header;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index fc957d980..55f5949e4 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -15,56 +15,63 @@ namespace {
15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
16 OperationCode operation_code) { 16 OperationCode operation_code) {
17 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
18 const Node node = code.at(cursor); 18 Node node = code.at(cursor);
19
19 if (const auto operation = std::get_if<OperationNode>(&*node)) { 20 if (const auto operation = std::get_if<OperationNode>(&*node)) {
20 if (operation->GetCode() == operation_code) { 21 if (operation->GetCode() == operation_code) {
21 return {node, cursor}; 22 return {std::move(node), cursor};
22 } 23 }
23 } 24 }
25
24 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 26 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
25 const auto& conditional_code = conditional->GetCode(); 27 const auto& conditional_code = conditional->GetCode();
26 const auto [found, internal_cursor] = FindOperation( 28 auto [found, internal_cursor] = FindOperation(
27 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); 29 conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
28 if (found) { 30 if (found) {
29 return {found, cursor}; 31 return {std::move(found), cursor};
30 } 32 }
31 } 33 }
32 } 34 }
33 return {}; 35 return {};
34} 36}
35} // namespace 37} // Anonymous namespace
36 38
37Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { 39std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
40 s64 cursor) const {
38 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 41 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
39 // Cbuf found, but it has to be immediate 42 // Constant buffer found, test if it's an immediate
40 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; 43 const auto offset = cbuf->GetOffset();
44 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
45 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
46 }
47 return {};
41 } 48 }
42 if (const auto gpr = std::get_if<GprNode>(&*tracked)) { 49 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
43 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 50 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
44 return nullptr; 51 return {};
45 } 52 }
46 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 53 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
47 // register that it uses as operand 54 // register that it uses as operand
48 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 55 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
49 if (!source) { 56 if (!source) {
50 return nullptr; 57 return {};
51 } 58 }
52 return TrackCbuf(source, code, new_cursor); 59 return TrackCbuf(source, code, new_cursor);
53 } 60 }
54 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 61 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
55 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { 62 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
56 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { 63 if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
57 // Cbuf found in operand 64 // Cbuf found in operand.
58 return found; 65 return found;
59 } 66 }
60 } 67 }
61 return nullptr; 68 return {};
62 } 69 }
63 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { 70 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
64 const auto& conditional_code = conditional->GetCode(); 71 const auto& conditional_code = conditional->GetCode();
65 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); 72 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
66 } 73 }
67 return nullptr; 74 return {};
68} 75}
69 76
70std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { 77std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index c50f6354d..4ceb219be 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -445,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
445 switch (format) { 445 switch (format) {
446 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 446 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
447 return PixelFormat::ABGR8U; 447 return PixelFormat::ABGR8U;
448 case Tegra::FramebufferConfig::PixelFormat::RGB565:
449 return PixelFormat::B5G6R5U;
448 case Tegra::FramebufferConfig::PixelFormat::BGRA8: 450 case Tegra::FramebufferConfig::PixelFormat::BGRA8:
449 return PixelFormat::BGRA8; 451 return PixelFormat::BGRA8;
450 default: 452 default:
451 LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); 453 UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
452 UNREACHABLE();
453 return PixelFormat::ABGR8U; 454 return PixelFormat::ABGR8U;
454 } 455 }
455} 456}
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 7a0fdb19b..683c49207 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -24,9 +24,8 @@ StagingCache::StagingCache() = default;
24StagingCache::~StagingCache() = default; 24StagingCache::~StagingCache() = default;
25 25
26SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) 26SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
27 : params{params}, mipmap_sizes(params.num_levels), 27 : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr},
28 mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ 28 mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) {
29 params.GetHostSizeInBytes()} {
30 std::size_t offset = 0; 29 std::size_t offset = 0;
31 for (u32 level = 0; level < params.num_levels; ++level) { 30 for (u32 level = 0; level < params.num_levels; ++level) {
32 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; 31 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
@@ -75,9 +74,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
75 74
76 // Linear Surface check 75 // Linear Surface check
77 if (!params.is_tiled) { 76 if (!params.is_tiled) {
78 if (std::tie(params.width, params.height, params.pitch) == 77 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
79 std::tie(rhs.width, rhs.height, rhs.pitch)) { 78 if (params.width == rhs.width) {
80 return MatchStructureResult::FullMatch; 79 return MatchStructureResult::FullMatch;
80 } else {
81 return MatchStructureResult::SemiMatch;
82 }
81 } 83 }
82 return MatchStructureResult::None; 84 return MatchStructureResult::None;
83 } 85 }
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 8ba386a8a..bcce8d863 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -200,8 +200,9 @@ public:
200 modification_tick = tick; 200 modification_tick = tick;
201 } 201 }
202 202
203 void MarkAsRenderTarget(const bool is_target) { 203 void MarkAsRenderTarget(const bool is_target, const u32 index) {
204 this->is_target = is_target; 204 this->is_target = is_target;
205 this->index = index;
205 } 206 }
206 207
207 void MarkAsPicked(const bool is_picked) { 208 void MarkAsPicked(const bool is_picked) {
@@ -221,6 +222,10 @@ public:
221 return is_target; 222 return is_target;
222 } 223 }
223 224
225 u32 GetRenderTarget() const {
226 return index;
227 }
228
224 bool IsRegistered() const { 229 bool IsRegistered() const {
225 return is_registered; 230 return is_registered;
226 } 231 }
@@ -307,10 +312,13 @@ private:
307 return view; 312 return view;
308 } 313 }
309 314
315 static constexpr u32 NO_RT = 0xFFFFFFFF;
316
310 bool is_modified{}; 317 bool is_modified{};
311 bool is_target{}; 318 bool is_target{};
312 bool is_registered{}; 319 bool is_registered{};
313 bool is_picked{}; 320 bool is_picked{};
321 u32 index{NO_RT};
314 u64 modification_tick{}; 322 u64 modification_tick{};
315}; 323};
316 324
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9c56e2b4f..fd5472451 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
290 290
291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, 291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
292 bool uncompressed) const { 292 bool uncompressed) const {
293 const bool tiled{as_host_size ? false : is_tiled};
294 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; 293 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
295 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; 294 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
296 const u32 depth{is_layered ? 1U : GetMipDepth(level)}; 295 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
297 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, 296 if (is_tiled) {
298 GetMipBlockHeight(level), GetMipBlockDepth(level)); 297 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
298 depth, GetMipBlockHeight(level),
299 GetMipBlockDepth(level));
300 } else if (as_host_size || IsBuffer()) {
301 return GetBytesPerPixel() * width * height * depth;
302 } else {
303 // Linear Texture Case
304 return pitch * height * depth;
305 }
299} 306}
300 307
301bool SurfaceParams::operator==(const SurfaceParams& rhs) const { 308bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 358d6757c..e7ef66ee2 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -58,7 +58,6 @@ public:
58 std::size_t GetHostSizeInBytes() const { 58 std::size_t GetHostSizeInBytes() const {
59 std::size_t host_size_in_bytes; 59 std::size_t host_size_in_bytes;
60 if (GetCompressionType() == SurfaceCompression::Converted) { 60 if (GetCompressionType() == SurfaceCompression::Converted) {
61 constexpr std::size_t rgb8_bpp = 4ULL;
62 // ASTC is uncompressed in software, in emulated as RGBA8 61 // ASTC is uncompressed in software, in emulated as RGBA8
63 host_size_in_bytes = 0; 62 host_size_in_bytes = 0;
64 for (u32 level = 0; level < num_levels; ++level) { 63 for (u32 level = 0; level < num_levels; ++level) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c9e72531a..2ec0203d1 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,10 +116,10 @@ public:
116 std::lock_guard lock{mutex}; 116 std::lock_guard lock{mutex};
117 auto& maxwell3d = system.GPU().Maxwell3D(); 117 auto& maxwell3d = system.GPU().Maxwell3D();
118 118
119 if (!maxwell3d.dirty_flags.zeta_buffer) { 119 if (!maxwell3d.dirty.depth_buffer) {
120 return depth_buffer.view; 120 return depth_buffer.view;
121 } 121 }
122 maxwell3d.dirty_flags.zeta_buffer = false; 122 maxwell3d.dirty.depth_buffer = false;
123 123
124 const auto& regs{maxwell3d.regs}; 124 const auto& regs{maxwell3d.regs};
125 const auto gpu_addr{regs.zeta.Address()}; 125 const auto gpu_addr{regs.zeta.Address()};
@@ -133,11 +133,11 @@ public:
133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; 133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); 134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
135 if (depth_buffer.target) 135 if (depth_buffer.target)
136 depth_buffer.target->MarkAsRenderTarget(false); 136 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
137 depth_buffer.target = surface_view.first; 137 depth_buffer.target = surface_view.first;
138 depth_buffer.view = surface_view.second; 138 depth_buffer.view = surface_view.second;
139 if (depth_buffer.target) 139 if (depth_buffer.target)
140 depth_buffer.target->MarkAsRenderTarget(true); 140 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
141 return surface_view.second; 141 return surface_view.second;
142 } 142 }
143 143
@@ -145,10 +145,10 @@ public:
145 std::lock_guard lock{mutex}; 145 std::lock_guard lock{mutex};
146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 146 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
147 auto& maxwell3d = system.GPU().Maxwell3D(); 147 auto& maxwell3d = system.GPU().Maxwell3D();
148 if (!maxwell3d.dirty_flags.color_buffer[index]) { 148 if (!maxwell3d.dirty.render_target[index]) {
149 return render_targets[index].view; 149 return render_targets[index].view;
150 } 150 }
151 maxwell3d.dirty_flags.color_buffer.reset(index); 151 maxwell3d.dirty.render_target[index] = false;
152 152
153 const auto& regs{maxwell3d.regs}; 153 const auto& regs{maxwell3d.regs};
154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || 154 if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -167,11 +167,11 @@ public:
167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), 167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
168 preserve_contents, true); 168 preserve_contents, true);
169 if (render_targets[index].target) 169 if (render_targets[index].target)
170 render_targets[index].target->MarkAsRenderTarget(false); 170 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
171 render_targets[index].target = surface_view.first; 171 render_targets[index].target = surface_view.first;
172 render_targets[index].view = surface_view.second; 172 render_targets[index].view = surface_view.second;
173 if (render_targets[index].target) 173 if (render_targets[index].target)
174 render_targets[index].target->MarkAsRenderTarget(true); 174 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
175 return surface_view.second; 175 return surface_view.second;
176 } 176 }
177 177
@@ -191,7 +191,7 @@ public:
191 if (depth_buffer.target == nullptr) { 191 if (depth_buffer.target == nullptr) {
192 return; 192 return;
193 } 193 }
194 depth_buffer.target->MarkAsRenderTarget(false); 194 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
195 depth_buffer.target = nullptr; 195 depth_buffer.target = nullptr;
196 depth_buffer.view = nullptr; 196 depth_buffer.view = nullptr;
197 } 197 }
@@ -200,7 +200,7 @@ public:
200 if (render_targets[index].target == nullptr) { 200 if (render_targets[index].target == nullptr) {
201 return; 201 return;
202 } 202 }
203 render_targets[index].target->MarkAsRenderTarget(false); 203 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
204 render_targets[index].target = nullptr; 204 render_targets[index].target = nullptr;
205 render_targets[index].view = nullptr; 205 render_targets[index].view = nullptr;
206 } 206 }
@@ -270,6 +270,17 @@ protected:
270 // and reading it from a sepparate buffer. 270 // and reading it from a sepparate buffer.
271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
272 272
273 void ManageRenderTargetUnregister(TSurface& surface) {
274 auto& maxwell3d = system.GPU().Maxwell3D();
275 const u32 index = surface->GetRenderTarget();
276 if (index == DEPTH_RT) {
277 maxwell3d.dirty.depth_buffer = true;
278 } else {
279 maxwell3d.dirty.render_target[index] = true;
280 }
281 maxwell3d.dirty.render_settings = true;
282 }
283
273 void Register(TSurface surface) { 284 void Register(TSurface surface) {
274 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 285 const GPUVAddr gpu_addr = surface->GetGpuAddr();
275 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); 286 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
@@ -294,8 +305,9 @@ protected:
294 if (guard_render_targets && surface->IsProtected()) { 305 if (guard_render_targets && surface->IsProtected()) {
295 return; 306 return;
296 } 307 }
297 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 308 if (!guard_render_targets && surface->IsRenderTarget()) {
298 const CacheAddr cache_ptr = surface->GetCacheAddr(); 309 ManageRenderTargetUnregister(surface);
310 }
299 const std::size_t size = surface->GetSizeInBytes(); 311 const std::size_t size = surface->GetSizeInBytes();
300 const VAddr cpu_addr = surface->GetCpuAddr(); 312 const VAddr cpu_addr = surface->GetCpuAddr();
301 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 313 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
@@ -649,15 +661,6 @@ private:
649 } 661 }
650 return {current_surface, *view}; 662 return {current_surface, *view};
651 } 663 }
652 // The next case is unsafe, so if we r in accurate GPU, just skip it
653 if (Settings::values.use_accurate_gpu_emulation) {
654 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
655 MatchTopologyResult::FullMatch);
656 }
657 // This is the case the texture is a part of the parent.
658 if (current_surface->MatchesSubTexture(params, gpu_addr)) {
659 return RebuildSurface(current_surface, params, is_render);
660 }
661 } else { 664 } else {
662 // If there are many overlaps, odds are they are subtextures of the candidate 665 // If there are many overlaps, odds are they are subtextures of the candidate
663 // surface. We try to construct a new surface based on the candidate parameters, 666 // surface. We try to construct a new surface based on the candidate parameters,
@@ -793,6 +796,9 @@ private:
793 static constexpr u64 registry_page_size{1 << registry_page_bits}; 796 static constexpr u64 registry_page_size{1 << registry_page_bits};
794 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 797 std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
795 798
799 static constexpr u32 DEPTH_RT = 8;
800 static constexpr u32 NO_RT = 0xFFFFFFFF;
801
796 // The L1 Cache is used for fast texture lookup before checking the overlaps 802 // The L1 Cache is used for fast texture lookup before checking the overlaps
797 // This avoids calculating size and other stuffs. 803 // This avoids calculating size and other stuffs.
798 std::unordered_map<CacheAddr, TSurface> l1_cache; 804 std::unordered_map<CacheAddr, TSurface> l1_cache;
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7e8295944..7df5f1452 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -257,19 +257,21 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
257 257
258void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 258void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
259 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, 259 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
260 u32 block_height_bit) { 260 u32 block_height_bit, u32 offset_x, u32 offset_y) {
261 const u32 block_height = 1U << block_height_bit; 261 const u32 block_height = 1U << block_height_bit;
262 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 262 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
263 gob_size_x}; 263 gob_size_x};
264 for (u32 line = 0; line < subrect_height; ++line) { 264 for (u32 line = 0; line < subrect_height; ++line) {
265 const u32 dst_y = line + offset_y;
265 const u32 gob_address_y = 266 const u32 gob_address_y =
266 (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + 267 (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
267 ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size; 268 ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
268 const auto& table = legacy_swizzle_table[line % gob_size_y]; 269 const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
269 for (u32 x = 0; x < subrect_width; ++x) { 270 for (u32 x = 0; x < subrect_width; ++x) {
271 const u32 dst_x = x + offset_x;
270 const u32 gob_address = 272 const u32 gob_address =
271 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 273 gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
272 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 274 const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
273 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 275 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
274 u8* dest_addr = swizzled_data + swizzled_offset; 276 u8* dest_addr = swizzled_data + swizzled_offset;
275 277
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index eaec9b5a5..f1e3952bc 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
44 44
45/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height); 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
48 u32 offset_x, u32 offset_y);
48 49
49/// Copies a tiled subrectangle into a linear surface. 50/// Copies a tiled subrectangle into a linear surface.
50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e3be018b9..e36bc2c04 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -213,7 +213,7 @@ struct TICEntry {
213 if (header_version != TICHeaderVersion::OneDBuffer) { 213 if (header_version != TICHeaderVersion::OneDBuffer) {
214 return width_minus_1 + 1; 214 return width_minus_1 + 1;
215 } 215 }
216 return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one; 216 return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
217 } 217 }
218 218
219 u32 Height() const { 219 u32 Height() const {
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 3dc0e47d0..f051e17b4 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -1,5 +1,6 @@
1set(CMAKE_AUTOMOC ON) 1set(CMAKE_AUTOMOC ON)
2set(CMAKE_AUTORCC ON) 2set(CMAKE_AUTORCC ON)
3set(CMAKE_AUTOUIC ON)
3set(CMAKE_INCLUDE_CURRENT_DIR ON) 4set(CMAKE_INCLUDE_CURRENT_DIR ON)
4set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules) 5set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR}/CMakeModules)
5 6
@@ -7,6 +8,7 @@ add_executable(yuzu
7 Info.plist 8 Info.plist
8 about_dialog.cpp 9 about_dialog.cpp
9 about_dialog.h 10 about_dialog.h
11 aboutdialog.ui
10 applets/error.cpp 12 applets/error.cpp
11 applets/error.h 13 applets/error.h
12 applets/profile_select.cpp 14 applets/profile_select.cpp
@@ -17,42 +19,59 @@ add_executable(yuzu
17 applets/web_browser.h 19 applets/web_browser.h
18 bootmanager.cpp 20 bootmanager.cpp
19 bootmanager.h 21 bootmanager.h
22 compatdb.ui
20 compatibility_list.cpp 23 compatibility_list.cpp
21 compatibility_list.h 24 compatibility_list.h
22 configuration/config.cpp 25 configuration/config.cpp
23 configuration/config.h 26 configuration/config.h
27 configuration/configure.ui
24 configuration/configure_audio.cpp 28 configuration/configure_audio.cpp
25 configuration/configure_audio.h 29 configuration/configure_audio.h
30 configuration/configure_audio.ui
26 configuration/configure_debug.cpp 31 configuration/configure_debug.cpp
27 configuration/configure_debug.h 32 configuration/configure_debug.h
33 configuration/configure_debug.ui
28 configuration/configure_dialog.cpp 34 configuration/configure_dialog.cpp
29 configuration/configure_dialog.h 35 configuration/configure_dialog.h
30 configuration/configure_gamelist.cpp 36 configuration/configure_gamelist.cpp
31 configuration/configure_gamelist.h 37 configuration/configure_gamelist.h
38 configuration/configure_gamelist.ui
32 configuration/configure_general.cpp 39 configuration/configure_general.cpp
33 configuration/configure_general.h 40 configuration/configure_general.h
41 configuration/configure_general.ui
34 configuration/configure_graphics.cpp 42 configuration/configure_graphics.cpp
35 configuration/configure_graphics.h 43 configuration/configure_graphics.h
44 configuration/configure_graphics.ui
36 configuration/configure_hotkeys.cpp 45 configuration/configure_hotkeys.cpp
37 configuration/configure_hotkeys.h 46 configuration/configure_hotkeys.h
47 configuration/configure_hotkeys.ui
38 configuration/configure_input.cpp 48 configuration/configure_input.cpp
39 configuration/configure_input.h 49 configuration/configure_input.h
50 configuration/configure_input.ui
40 configuration/configure_input_player.cpp 51 configuration/configure_input_player.cpp
41 configuration/configure_input_player.h 52 configuration/configure_input_player.h
53 configuration/configure_input_player.ui
42 configuration/configure_input_simple.cpp 54 configuration/configure_input_simple.cpp
43 configuration/configure_input_simple.h 55 configuration/configure_input_simple.h
56 configuration/configure_input_simple.ui
44 configuration/configure_mouse_advanced.cpp 57 configuration/configure_mouse_advanced.cpp
45 configuration/configure_mouse_advanced.h 58 configuration/configure_mouse_advanced.h
59 configuration/configure_mouse_advanced.ui
60 configuration/configure_per_general.cpp
61 configuration/configure_per_general.h
62 configuration/configure_per_general.ui
46 configuration/configure_profile_manager.cpp 63 configuration/configure_profile_manager.cpp
47 configuration/configure_profile_manager.h 64 configuration/configure_profile_manager.h
65 configuration/configure_profile_manager.ui
48 configuration/configure_system.cpp 66 configuration/configure_system.cpp
49 configuration/configure_system.h 67 configuration/configure_system.h
50 configuration/configure_per_general.cpp 68 configuration/configure_system.ui
51 configuration/configure_per_general.h
52 configuration/configure_touchscreen_advanced.cpp 69 configuration/configure_touchscreen_advanced.cpp
53 configuration/configure_touchscreen_advanced.h 70 configuration/configure_touchscreen_advanced.h
71 configuration/configure_touchscreen_advanced.ui
54 configuration/configure_web.cpp 72 configuration/configure_web.cpp
55 configuration/configure_web.h 73 configuration/configure_web.h
74 configuration/configure_web.ui
56 debugger/graphics/graphics_breakpoint_observer.cpp 75 debugger/graphics/graphics_breakpoint_observer.cpp
57 debugger/graphics/graphics_breakpoint_observer.h 76 debugger/graphics/graphics_breakpoint_observer.h
58 debugger/graphics/graphics_breakpoints.cpp 77 debugger/graphics/graphics_breakpoints.cpp
@@ -72,12 +91,14 @@ add_executable(yuzu
72 game_list_worker.h 91 game_list_worker.h
73 loading_screen.cpp 92 loading_screen.cpp
74 loading_screen.h 93 loading_screen.h
94 loading_screen.ui
75 hotkeys.cpp 95 hotkeys.cpp
76 hotkeys.h 96 hotkeys.h
77 main.cpp 97 main.cpp
78 main.h 98 main.h
79 ui_settings.cpp 99 main.ui
80 ui_settings.h 100 uisettings.cpp
101 uisettings.h
81 util/limitable_input_dialog.cpp 102 util/limitable_input_dialog.cpp
82 util/limitable_input_dialog.h 103 util/limitable_input_dialog.h
83 util/sequence_dialog/sequence_dialog.cpp 104 util/sequence_dialog/sequence_dialog.cpp
@@ -89,44 +110,18 @@ add_executable(yuzu
89 yuzu.rc 110 yuzu.rc
90) 111)
91 112
92set(UIS
93 aboutdialog.ui
94 configuration/configure.ui
95 configuration/configure_audio.ui
96 configuration/configure_debug.ui
97 configuration/configure_gamelist.ui
98 configuration/configure_general.ui
99 configuration/configure_graphics.ui
100 configuration/configure_hotkeys.ui
101 configuration/configure_input.ui
102 configuration/configure_input_player.ui
103 configuration/configure_input_simple.ui
104 configuration/configure_mouse_advanced.ui
105 configuration/configure_per_general.ui
106 configuration/configure_profile_manager.ui
107 configuration/configure_system.ui
108 configuration/configure_touchscreen_advanced.ui
109 configuration/configure_web.ui
110 compatdb.ui
111 loading_screen.ui
112 main.ui
113)
114
115file(GLOB COMPAT_LIST 113file(GLOB COMPAT_LIST
116 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc 114 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.qrc
117 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json) 115 ${PROJECT_BINARY_DIR}/dist/compatibility_list/compatibility_list.json)
118file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*) 116file(GLOB_RECURSE ICONS ${PROJECT_SOURCE_DIR}/dist/icons/*)
119file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*) 117file(GLOB_RECURSE THEMES ${PROJECT_SOURCE_DIR}/dist/qt_themes/*)
120 118
121qt5_wrap_ui(UI_HDRS ${UIS})
122 119
123target_sources(yuzu 120target_sources(yuzu
124 PRIVATE 121 PRIVATE
125 ${COMPAT_LIST} 122 ${COMPAT_LIST}
126 ${ICONS} 123 ${ICONS}
127 ${THEMES} 124 ${THEMES}
128 ${UI_HDRS}
129 ${UIS}
130) 125)
131 126
132if (APPLE) 127if (APPLE)
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 73978ff5b..0456248ac 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -11,7 +11,7 @@
11#include "core/hle/service/hid/controllers/npad.h" 11#include "core/hle/service/hid/controllers/npad.h"
12#include "input_common/main.h" 12#include "input_common/main.h"
13#include "yuzu/configuration/config.h" 13#include "yuzu/configuration/config.h"
14#include "yuzu/ui_settings.h" 14#include "yuzu/uisettings.h"
15 15
16Config::Config() { 16Config::Config() {
17 // TODO: Don't hardcode the path; let the frontend decide where to put the config files. 17 // TODO: Don't hardcode the path; let the frontend decide where to put the config files.
@@ -436,8 +436,6 @@ void Config::ReadControlValues() {
436void Config::ReadCoreValues() { 436void Config::ReadCoreValues() {
437 qt_config->beginGroup(QStringLiteral("Core")); 437 qt_config->beginGroup(QStringLiteral("Core"));
438 438
439 Settings::values.cpu_jit_enabled =
440 ReadSetting(QStringLiteral("cpu_jit_enabled"), true).toBool();
441 Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool(); 439 Settings::values.use_multi_core = ReadSetting(QStringLiteral("use_multi_core"), false).toBool();
442 440
443 qt_config->endGroup(); 441 qt_config->endGroup();
@@ -518,6 +516,7 @@ void Config::ReadPathValues() {
518 516
519 UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString(); 517 UISettings::values.roms_path = ReadSetting(QStringLiteral("romsPath")).toString();
520 UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString(); 518 UISettings::values.symbols_path = ReadSetting(QStringLiteral("symbolsPath")).toString();
519 UISettings::values.screenshot_path = ReadSetting(QStringLiteral("screenshotPath")).toString();
521 UISettings::values.game_directory_path = 520 UISettings::values.game_directory_path =
522 ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString(); 521 ReadSetting(QStringLiteral("gameListRootDir"), QStringLiteral(".")).toString();
523 UISettings::values.game_directory_deepscan = 522 UISettings::values.game_directory_deepscan =
@@ -831,7 +830,6 @@ void Config::SaveControlValues() {
831void Config::SaveCoreValues() { 830void Config::SaveCoreValues() {
832 qt_config->beginGroup(QStringLiteral("Core")); 831 qt_config->beginGroup(QStringLiteral("Core"));
833 832
834 WriteSetting(QStringLiteral("cpu_jit_enabled"), Settings::values.cpu_jit_enabled, true);
835 WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false); 833 WriteSetting(QStringLiteral("use_multi_core"), Settings::values.use_multi_core, false);
836 834
837 qt_config->endGroup(); 835 qt_config->endGroup();
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp
index 9a13bb797..5b7e03056 100644
--- a/src/yuzu/configuration/configure_debug.cpp
+++ b/src/yuzu/configuration/configure_debug.cpp
@@ -12,13 +12,13 @@
12#include "ui_configure_debug.h" 12#include "ui_configure_debug.h"
13#include "yuzu/configuration/configure_debug.h" 13#include "yuzu/configuration/configure_debug.h"
14#include "yuzu/debugger/console.h" 14#include "yuzu/debugger/console.h"
15#include "yuzu/ui_settings.h" 15#include "yuzu/uisettings.h"
16 16
17ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) { 17ConfigureDebug::ConfigureDebug(QWidget* parent) : QWidget(parent), ui(new Ui::ConfigureDebug) {
18 ui->setupUi(this); 18 ui->setupUi(this);
19 SetConfiguration(); 19 SetConfiguration();
20 20
21 connect(ui->open_log_button, &QPushButton::pressed, []() { 21 connect(ui->open_log_button, &QPushButton::clicked, []() {
22 QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir)); 22 QString path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::LogDir));
23 QDesktopServices::openUrl(QUrl::fromLocalFile(path)); 23 QDesktopServices::openUrl(QUrl::fromLocalFile(path));
24 }); 24 });
diff --git a/src/yuzu/configuration/configure_gamelist.cpp b/src/yuzu/configuration/configure_gamelist.cpp
index d1724ba89..daedbc33e 100644
--- a/src/yuzu/configuration/configure_gamelist.cpp
+++ b/src/yuzu/configuration/configure_gamelist.cpp
@@ -9,7 +9,7 @@
9#include "core/settings.h" 9#include "core/settings.h"
10#include "ui_configure_gamelist.h" 10#include "ui_configure_gamelist.h"
11#include "yuzu/configuration/configure_gamelist.h" 11#include "yuzu/configuration/configure_gamelist.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace { 14namespace {
15constexpr std::array default_icon_sizes{ 15constexpr std::array default_icon_sizes{
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 7a6e921cd..75fcbfea3 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -6,7 +6,7 @@
6#include "core/settings.h" 6#include "core/settings.h"
7#include "ui_configure_general.h" 7#include "ui_configure_general.h"
8#include "yuzu/configuration/configure_general.h" 8#include "yuzu/configuration/configure_general.h"
9#include "yuzu/ui_settings.h" 9#include "yuzu/uisettings.h"
10 10
11ConfigureGeneral::ConfigureGeneral(QWidget* parent) 11ConfigureGeneral::ConfigureGeneral(QWidget* parent)
12 : QWidget(parent), ui(new Ui::ConfigureGeneral) { 12 : QWidget(parent), ui(new Ui::ConfigureGeneral) {
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp
index 4dd775aab..7613197f2 100644
--- a/src/yuzu/configuration/configure_input.cpp
+++ b/src/yuzu/configuration/configure_input.cpp
@@ -79,7 +79,7 @@ ConfigureInput::ConfigureInput(QWidget* parent)
79 LoadConfiguration(); 79 LoadConfiguration();
80 UpdateUIEnabled(); 80 UpdateUIEnabled();
81 81
82 connect(ui->restore_defaults_button, &QPushButton::pressed, this, 82 connect(ui->restore_defaults_button, &QPushButton::clicked, this,
83 &ConfigureInput::RestoreDefaults); 83 &ConfigureInput::RestoreDefaults);
84 84
85 for (auto* enabled : players_controller) { 85 for (auto* enabled : players_controller) {
@@ -96,20 +96,20 @@ ConfigureInput::ConfigureInput(QWidget* parent)
96 &ConfigureInput::UpdateUIEnabled); 96 &ConfigureInput::UpdateUIEnabled);
97 97
98 for (std::size_t i = 0; i < players_configure.size(); ++i) { 98 for (std::size_t i = 0; i < players_configure.size(); ++i) {
99 connect(players_configure[i], &QPushButton::pressed, this, 99 connect(players_configure[i], &QPushButton::clicked, this,
100 [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); }); 100 [this, i] { CallConfigureDialog<ConfigureInputPlayer>(*this, i, false); });
101 } 101 }
102 102
103 connect(ui->handheld_configure, &QPushButton::pressed, this, 103 connect(ui->handheld_configure, &QPushButton::clicked, this,
104 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); }); 104 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 8, false); });
105 105
106 connect(ui->debug_configure, &QPushButton::pressed, this, 106 connect(ui->debug_configure, &QPushButton::clicked, this,
107 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); }); 107 [this] { CallConfigureDialog<ConfigureInputPlayer>(*this, 9, true); });
108 108
109 connect(ui->mouse_advanced, &QPushButton::pressed, this, 109 connect(ui->mouse_advanced, &QPushButton::clicked, this,
110 [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); }); 110 [this] { CallConfigureDialog<ConfigureMouseAdvanced>(*this); });
111 111
112 connect(ui->touchscreen_advanced, &QPushButton::pressed, this, 112 connect(ui->touchscreen_advanced, &QPushButton::clicked, this,
113 [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); }); 113 [this] { CallConfigureDialog<ConfigureTouchscreenAdvanced>(*this); });
114} 114}
115 115
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 916baccc1..7b70f307c 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -244,7 +244,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
244 } 244 }
245 245
246 button->setContextMenuPolicy(Qt::CustomContextMenu); 246 button->setContextMenuPolicy(Qt::CustomContextMenu);
247 connect(button, &QPushButton::released, [=] { 247 connect(button, &QPushButton::clicked, [=] {
248 HandleClick( 248 HandleClick(
249 button_map[button_id], 249 button_map[button_id],
250 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, 250 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; },
@@ -273,7 +273,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
273 } 273 }
274 274
275 analog_button->setContextMenuPolicy(Qt::CustomContextMenu); 275 analog_button->setContextMenuPolicy(Qt::CustomContextMenu);
276 connect(analog_button, &QPushButton::released, [=]() { 276 connect(analog_button, &QPushButton::clicked, [=]() {
277 HandleClick(analog_map_buttons[analog_id][sub_button_id], 277 HandleClick(analog_map_buttons[analog_id][sub_button_id],
278 [=](const Common::ParamPackage& params) { 278 [=](const Common::ParamPackage& params) {
279 SetAnalogButton(params, analogs_param[analog_id], 279 SetAnalogButton(params, analogs_param[analog_id],
@@ -300,7 +300,7 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
300 menu_location)); 300 menu_location));
301 }); 301 });
302 } 302 }
303 connect(analog_map_stick[analog_id], &QPushButton::released, [=] { 303 connect(analog_map_stick[analog_id], &QPushButton::clicked, [=] {
304 QMessageBox::information(this, tr("Information"), 304 QMessageBox::information(this, tr("Information"),
305 tr("After pressing OK, first move your joystick horizontally, " 305 tr("After pressing OK, first move your joystick horizontally, "
306 "and then vertically.")); 306 "and then vertically."));
@@ -311,8 +311,8 @@ ConfigureInputPlayer::ConfigureInputPlayer(QWidget* parent, std::size_t player_i
311 }); 311 });
312 } 312 }
313 313
314 connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); 314 connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
315 connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); 315 connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); });
316 316
317 timeout_timer->setSingleShot(true); 317 timeout_timer->setSingleShot(true);
318 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); 318 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); });
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp
index 864803ea3..ab3a11d30 100644
--- a/src/yuzu/configuration/configure_input_simple.cpp
+++ b/src/yuzu/configuration/configure_input_simple.cpp
@@ -9,7 +9,7 @@
9#include "yuzu/configuration/configure_input.h" 9#include "yuzu/configuration/configure_input.h"
10#include "yuzu/configuration/configure_input_player.h" 10#include "yuzu/configuration/configure_input_player.h"
11#include "yuzu/configuration/configure_input_simple.h" 11#include "yuzu/configuration/configure_input_simple.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace { 14namespace {
15 15
@@ -101,7 +101,7 @@ ConfigureInputSimple::ConfigureInputSimple(QWidget* parent)
101 101
102 connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this, 102 connect(ui->profile_combobox, QOverload<int>::of(&QComboBox::currentIndexChanged), this,
103 &ConfigureInputSimple::OnSelectProfile); 103 &ConfigureInputSimple::OnSelectProfile);
104 connect(ui->profile_configure, &QPushButton::pressed, this, &ConfigureInputSimple::OnConfigure); 104 connect(ui->profile_configure, &QPushButton::clicked, this, &ConfigureInputSimple::OnConfigure);
105 105
106 LoadConfiguration(); 106 LoadConfiguration();
107} 107}
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp
index b7305e653..0a4abe34f 100644
--- a/src/yuzu/configuration/configure_mouse_advanced.cpp
+++ b/src/yuzu/configuration/configure_mouse_advanced.cpp
@@ -83,7 +83,7 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent)
83 } 83 }
84 84
85 button->setContextMenuPolicy(Qt::CustomContextMenu); 85 button->setContextMenuPolicy(Qt::CustomContextMenu);
86 connect(button, &QPushButton::released, [=] { 86 connect(button, &QPushButton::clicked, [=] {
87 HandleClick( 87 HandleClick(
88 button_map[button_id], 88 button_map[button_id],
89 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; }, 89 [=](const Common::ParamPackage& params) { buttons_param[button_id] = params; },
@@ -104,8 +104,8 @@ ConfigureMouseAdvanced::ConfigureMouseAdvanced(QWidget* parent)
104 }); 104 });
105 } 105 }
106 106
107 connect(ui->buttonClearAll, &QPushButton::released, [this] { ClearAll(); }); 107 connect(ui->buttonClearAll, &QPushButton::clicked, [this] { ClearAll(); });
108 connect(ui->buttonRestoreDefaults, &QPushButton::released, [this] { RestoreDefaults(); }); 108 connect(ui->buttonRestoreDefaults, &QPushButton::clicked, [this] { RestoreDefaults(); });
109 109
110 timeout_timer->setSingleShot(true); 110 timeout_timer->setSingleShot(true);
111 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); }); 111 connect(timeout_timer.get(), &QTimer::timeout, [this] { SetPollingResult({}, true); });
diff --git a/src/yuzu/configuration/configure_per_general.cpp b/src/yuzu/configuration/configure_per_general.cpp
index 90336e235..d7f259f12 100644
--- a/src/yuzu/configuration/configure_per_general.cpp
+++ b/src/yuzu/configuration/configure_per_general.cpp
@@ -23,7 +23,7 @@
23#include "yuzu/configuration/config.h" 23#include "yuzu/configuration/config.h"
24#include "yuzu/configuration/configure_input.h" 24#include "yuzu/configuration/configure_input.h"
25#include "yuzu/configuration/configure_per_general.h" 25#include "yuzu/configuration/configure_per_general.h"
26#include "yuzu/ui_settings.h" 26#include "yuzu/uisettings.h"
27#include "yuzu/util/util.h" 27#include "yuzu/util/util.h"
28 28
29ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id) 29ConfigurePerGameGeneral::ConfigurePerGameGeneral(QWidget* parent, u64 title_id)
diff --git a/src/yuzu/configuration/configure_profile_manager.cpp b/src/yuzu/configuration/configure_profile_manager.cpp
index c90f4cdd8..f53423440 100644
--- a/src/yuzu/configuration/configure_profile_manager.cpp
+++ b/src/yuzu/configuration/configure_profile_manager.cpp
@@ -108,10 +108,10 @@ ConfigureProfileManager ::ConfigureProfileManager(QWidget* parent)
108 108
109 connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser); 109 connect(tree_view, &QTreeView::clicked, this, &ConfigureProfileManager::SelectUser);
110 110
111 connect(ui->pm_add, &QPushButton::pressed, this, &ConfigureProfileManager::AddUser); 111 connect(ui->pm_add, &QPushButton::clicked, this, &ConfigureProfileManager::AddUser);
112 connect(ui->pm_rename, &QPushButton::pressed, this, &ConfigureProfileManager::RenameUser); 112 connect(ui->pm_rename, &QPushButton::clicked, this, &ConfigureProfileManager::RenameUser);
113 connect(ui->pm_remove, &QPushButton::pressed, this, &ConfigureProfileManager::DeleteUser); 113 connect(ui->pm_remove, &QPushButton::clicked, this, &ConfigureProfileManager::DeleteUser);
114 connect(ui->pm_set_image, &QPushButton::pressed, this, &ConfigureProfileManager::SetUserImage); 114 connect(ui->pm_set_image, &QPushButton::clicked, this, &ConfigureProfileManager::SetUserImage);
115 115
116 scene = new QGraphicsScene; 116 scene = new QGraphicsScene;
117 ui->current_user_icon->setScene(scene); 117 ui->current_user_icon->setScene(scene);
diff --git a/src/yuzu/configuration/configure_touchscreen_advanced.cpp b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
index 8ced28c75..7d7cc00b7 100644
--- a/src/yuzu/configuration/configure_touchscreen_advanced.cpp
+++ b/src/yuzu/configuration/configure_touchscreen_advanced.cpp
@@ -11,7 +11,7 @@ ConfigureTouchscreenAdvanced::ConfigureTouchscreenAdvanced(QWidget* parent)
11 : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) { 11 : QDialog(parent), ui(std::make_unique<Ui::ConfigureTouchscreenAdvanced>()) {
12 ui->setupUi(this); 12 ui->setupUi(this);
13 13
14 connect(ui->restore_defaults_button, &QPushButton::pressed, this, 14 connect(ui->restore_defaults_button, &QPushButton::clicked, this,
15 &ConfigureTouchscreenAdvanced::RestoreDefaults); 15 &ConfigureTouchscreenAdvanced::RestoreDefaults);
16 16
17 LoadConfiguration(); 17 LoadConfiguration();
diff --git a/src/yuzu/configuration/configure_web.cpp b/src/yuzu/configuration/configure_web.cpp
index 5a70ef168..336b062b3 100644
--- a/src/yuzu/configuration/configure_web.cpp
+++ b/src/yuzu/configuration/configure_web.cpp
@@ -9,7 +9,7 @@
9#include "core/telemetry_session.h" 9#include "core/telemetry_session.h"
10#include "ui_configure_web.h" 10#include "ui_configure_web.h"
11#include "yuzu/configuration/configure_web.h" 11#include "yuzu/configuration/configure_web.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14ConfigureWeb::ConfigureWeb(QWidget* parent) 14ConfigureWeb::ConfigureWeb(QWidget* parent)
15 : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) { 15 : QWidget(parent), ui(std::make_unique<Ui::ConfigureWeb>()) {
diff --git a/src/yuzu/debugger/console.cpp b/src/yuzu/debugger/console.cpp
index 320898f6a..207ff4d58 100644
--- a/src/yuzu/debugger/console.cpp
+++ b/src/yuzu/debugger/console.cpp
@@ -10,7 +10,7 @@
10 10
11#include "common/logging/backend.h" 11#include "common/logging/backend.h"
12#include "yuzu/debugger/console.h" 12#include "yuzu/debugger/console.h"
13#include "yuzu/ui_settings.h" 13#include "yuzu/uisettings.h"
14 14
15namespace Debugger { 15namespace Debugger {
16void ToggleConsole() { 16void ToggleConsole() {
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp
index 9d87a41eb..ea0079353 100644
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -9,7 +9,7 @@
9#include "core/core.h" 9#include "core/core.h"
10#include "core/loader/loader.h" 10#include "core/loader/loader.h"
11#include "yuzu/discord_impl.h" 11#include "yuzu/discord_impl.h"
12#include "yuzu/ui_settings.h" 12#include "yuzu/uisettings.h"
13 13
14namespace DiscordRPC { 14namespace DiscordRPC {
15 15
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index 1885587af..d18b96519 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -23,7 +23,7 @@
23#include "yuzu/game_list_p.h" 23#include "yuzu/game_list_p.h"
24#include "yuzu/game_list_worker.h" 24#include "yuzu/game_list_worker.h"
25#include "yuzu/main.h" 25#include "yuzu/main.h"
26#include "yuzu/ui_settings.h" 26#include "yuzu/uisettings.h"
27 27
28GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {} 28GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist) : gamelist{gamelist} {}
29 29
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 0b458ef48..ece534dd6 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -19,7 +19,7 @@
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/logging/log.h" 20#include "common/logging/log.h"
21#include "common/string_util.h" 21#include "common/string_util.h"
22#include "yuzu/ui_settings.h" 22#include "yuzu/uisettings.h"
23#include "yuzu/util/util.h" 23#include "yuzu/util/util.h"
24 24
25/** 25/**
diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp
index 4f30e9147..77f358630 100644
--- a/src/yuzu/game_list_worker.cpp
+++ b/src/yuzu/game_list_worker.cpp
@@ -29,7 +29,7 @@
29#include "yuzu/game_list.h" 29#include "yuzu/game_list.h"
30#include "yuzu/game_list_p.h" 30#include "yuzu/game_list_p.h"
31#include "yuzu/game_list_worker.h" 31#include "yuzu/game_list_worker.h"
32#include "yuzu/ui_settings.h" 32#include "yuzu/uisettings.h"
33 33
34namespace { 34namespace {
35 35
diff --git a/src/yuzu/hotkeys.cpp b/src/yuzu/hotkeys.cpp
index 4582e7f21..d4e97fa16 100644
--- a/src/yuzu/hotkeys.cpp
+++ b/src/yuzu/hotkeys.cpp
@@ -7,7 +7,7 @@
7#include <QTreeWidgetItem> 7#include <QTreeWidgetItem>
8#include <QtGlobal> 8#include <QtGlobal>
9#include "yuzu/hotkeys.h" 9#include "yuzu/hotkeys.h"
10#include "yuzu/ui_settings.h" 10#include "yuzu/uisettings.h"
11 11
12HotkeyRegistry::HotkeyRegistry() = default; 12HotkeyRegistry::HotkeyRegistry() = default;
13HotkeyRegistry::~HotkeyRegistry() = default; 13HotkeyRegistry::~HotkeyRegistry() = default;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index ae21f4753..ac57229d5 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -100,7 +100,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
100#include "yuzu/hotkeys.h" 100#include "yuzu/hotkeys.h"
101#include "yuzu/loading_screen.h" 101#include "yuzu/loading_screen.h"
102#include "yuzu/main.h" 102#include "yuzu/main.h"
103#include "yuzu/ui_settings.h" 103#include "yuzu/uisettings.h"
104 104
105#ifdef USE_DISCORD_PRESENCE 105#ifdef USE_DISCORD_PRESENCE
106#include "yuzu/discord_impl.h" 106#include "yuzu/discord_impl.h"
@@ -119,6 +119,7 @@ Q_IMPORT_PLUGIN(QWindowsIntegrationPlugin);
119#endif 119#endif
120 120
121#ifdef _WIN32 121#ifdef _WIN32
122#include <windows.h>
122extern "C" { 123extern "C" {
123// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable 124// tells Nvidia and AMD drivers to use the dedicated GPU by default on laptops with switchable
124// graphics 125// graphics
@@ -747,6 +748,18 @@ void GMainWindow::OnDisplayTitleBars(bool show) {
747 } 748 }
748} 749}
749 750
751void GMainWindow::PreventOSSleep() {
752#ifdef _WIN32
753 SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED | ES_DISPLAY_REQUIRED);
754#endif
755}
756
757void GMainWindow::AllowOSSleep() {
758#ifdef _WIN32
759 SetThreadExecutionState(ES_CONTINUOUS);
760#endif
761}
762
750QStringList GMainWindow::GetUnsupportedGLExtensions() { 763QStringList GMainWindow::GetUnsupportedGLExtensions() {
751 QStringList unsupported_ext; 764 QStringList unsupported_ext;
752 765
@@ -966,6 +979,8 @@ void GMainWindow::BootGame(const QString& filename) {
966} 979}
967 980
968void GMainWindow::ShutdownGame() { 981void GMainWindow::ShutdownGame() {
982 AllowOSSleep();
983
969 discord_rpc->Pause(); 984 discord_rpc->Pause();
970 emu_thread->RequestStop(); 985 emu_thread->RequestStop();
971 986
@@ -1567,6 +1582,8 @@ void GMainWindow::OnMenuRecentFile() {
1567} 1582}
1568 1583
1569void GMainWindow::OnStartGame() { 1584void GMainWindow::OnStartGame() {
1585 PreventOSSleep();
1586
1570 emu_thread->SetRunning(true); 1587 emu_thread->SetRunning(true);
1571 1588
1572 qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>( 1589 qRegisterMetaType<Core::Frontend::SoftwareKeyboardParameters>(
@@ -1598,6 +1615,8 @@ void GMainWindow::OnPauseGame() {
1598 ui.action_Pause->setEnabled(false); 1615 ui.action_Pause->setEnabled(false);
1599 ui.action_Stop->setEnabled(true); 1616 ui.action_Stop->setEnabled(true);
1600 ui.action_Capture_Screenshot->setEnabled(false); 1617 ui.action_Capture_Screenshot->setEnabled(false);
1618
1619 AllowOSSleep();
1601} 1620}
1602 1621
1603void GMainWindow::OnStopGame() { 1622void GMainWindow::OnStopGame() {
@@ -1843,13 +1862,14 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det
1843 "data, or other bugs."); 1862 "data, or other bugs.");
1844 switch (result) { 1863 switch (result) {
1845 case Core::System::ResultStatus::ErrorSystemFiles: { 1864 case Core::System::ResultStatus::ErrorSystemFiles: {
1846 QString message = tr("yuzu was unable to locate a Switch system archive"); 1865 QString message;
1847 if (!details.empty()) { 1866 if (details.empty()) {
1848 message.append(tr(": %1. ").arg(QString::fromStdString(details))); 1867 message =
1868 tr("yuzu was unable to locate a Switch system archive. %1").arg(common_message);
1849 } else { 1869 } else {
1850 message.append(tr(". ")); 1870 message = tr("yuzu was unable to locate a Switch system archive: %1. %2")
1871 .arg(QString::fromStdString(details), common_message);
1851 } 1872 }
1852 message.append(common_message);
1853 1873
1854 answer = QMessageBox::question(this, tr("System Archive Not Found"), message, 1874 answer = QMessageBox::question(this, tr("System Archive Not Found"), message,
1855 QMessageBox::Yes | QMessageBox::No, QMessageBox::No); 1875 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
@@ -1858,8 +1878,8 @@ void GMainWindow::OnCoreError(Core::System::ResultStatus result, std::string det
1858 } 1878 }
1859 1879
1860 case Core::System::ResultStatus::ErrorSharedFont: { 1880 case Core::System::ResultStatus::ErrorSharedFont: {
1861 QString message = tr("yuzu was unable to locate the Switch shared fonts. "); 1881 const QString message =
1862 message.append(common_message); 1882 tr("yuzu was unable to locate the Switch shared fonts. %1").arg(common_message);
1863 answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message, 1883 answer = QMessageBox::question(this, tr("Shared Fonts Not Found"), message,
1864 QMessageBox::Yes | QMessageBox::No, QMessageBox::No); 1884 QMessageBox::Yes | QMessageBox::No, QMessageBox::No);
1865 status_message = tr("Shared Font Missing"); 1885 status_message = tr("Shared Font Missing");
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 1137bbc7a..501608ddc 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -130,6 +130,9 @@ private:
130 void ConnectWidgetEvents(); 130 void ConnectWidgetEvents();
131 void ConnectMenuEvents(); 131 void ConnectMenuEvents();
132 132
133 void PreventOSSleep();
134 void AllowOSSleep();
135
133 QStringList GetUnsupportedGLExtensions(); 136 QStringList GetUnsupportedGLExtensions();
134 bool LoadROM(const QString& filename); 137 bool LoadROM(const QString& filename);
135 void BootGame(const QString& filename); 138 void BootGame(const QString& filename);
diff --git a/src/yuzu/ui_settings.cpp b/src/yuzu/uisettings.cpp
index 4bdc302e0..7f7d247a3 100644
--- a/src/yuzu/ui_settings.cpp
+++ b/src/yuzu/uisettings.cpp
@@ -2,7 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "ui_settings.h" 5#include "yuzu/uisettings.h"
6 6
7namespace UISettings { 7namespace UISettings {
8 8
diff --git a/src/yuzu/ui_settings.h b/src/yuzu/uisettings.h
index a62cd6911..a62cd6911 100644
--- a/src/yuzu/ui_settings.h
+++ b/src/yuzu/uisettings.h
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 30b22341b..067d58d80 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -340,7 +340,6 @@ void Config::ReadValues() {
340 } 340 }
341 341
342 // Core 342 // Core
343 Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true);
344 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 343 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
345 344
346 // Renderer 345 // Renderer
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 4f1add434..0cfc111a6 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -76,10 +76,6 @@ motion_device=
76touch_device= 76touch_device=
77 77
78[Core] 78[Core]
79# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
80# 0: Interpreter (slow), 1 (default): JIT (fast)
81cpu_jit_enabled =
82
83# Whether to use multi-core for CPU emulation 79# Whether to use multi-core for CPU emulation
84# 0 (default): Disabled, 1: Enabled 80# 0 (default): Disabled, 1: Enabled
85use_multi_core= 81use_multi_core=
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index b96b7d279..9a11dc6c3 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -114,7 +114,6 @@ void Config::ReadValues() {
114 } 114 }
115 115
116 // Core 116 // Core
117 Settings::values.cpu_jit_enabled = sdl2_config->GetBoolean("Core", "cpu_jit_enabled", true);
118 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 117 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
119 118
120 // Renderer 119 // Renderer
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h
index 0f880d8c7..9a3e86d68 100644
--- a/src/yuzu_tester/default_ini.h
+++ b/src/yuzu_tester/default_ini.h
@@ -8,10 +8,6 @@ namespace DefaultINI {
8 8
9const char* sdl2_config_file = R"( 9const char* sdl2_config_file = R"(
10[Core] 10[Core]
11# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
12# 0: Interpreter (slow), 1 (default): JIT (fast)
13cpu_jit_enabled =
14
15# Whether to use multi-core for CPU emulation 11# Whether to use multi-core for CPU emulation
16# 0 (default): Disabled, 1: Enabled 12# 0 (default): Disabled, 1: Enabled
17use_multi_core= 13use_multi_core=
diff --git a/src/yuzu_tester/yuzu.cpp b/src/yuzu_tester/yuzu.cpp
index b589c3de3..0ee97aa54 100644
--- a/src/yuzu_tester/yuzu.cpp
+++ b/src/yuzu_tester/yuzu.cpp
@@ -92,7 +92,6 @@ int main(int argc, char** argv) {
92 92
93 int option_index = 0; 93 int option_index = 0;
94 94
95 char* endarg;
96#ifdef _WIN32 95#ifdef _WIN32
97 int argc_w; 96 int argc_w;
98 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w); 97 auto argv_w = CommandLineToArgvW(GetCommandLineW(), &argc_w);
@@ -226,7 +225,7 @@ int main(int argc, char** argv) {
226 225
227 switch (load_result) { 226 switch (load_result) {
228 case Core::System::ResultStatus::ErrorGetLoader: 227 case Core::System::ResultStatus::ErrorGetLoader:
229 LOG_CRITICAL(Frontend, "Failed to obtain loader for %s!", filepath.c_str()); 228 LOG_CRITICAL(Frontend, "Failed to obtain loader for {}!", filepath);
230 return -1; 229 return -1;
231 case Core::System::ResultStatus::ErrorLoader: 230 case Core::System::ResultStatus::ErrorLoader:
232 LOG_CRITICAL(Frontend, "Failed to load ROM!"); 231 LOG_CRITICAL(Frontend, "Failed to load ROM!");