summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.ci/scripts/.gitkeep0
-rw-r--r--.ci/scripts/common/post-upload.sh15
-rw-r--r--.ci/scripts/common/pre-upload.sh6
-rw-r--r--.ci/scripts/format/docker.sh6
-rw-r--r--.ci/scripts/format/exec.sh4
-rw-r--r--.ci/scripts/format/script.sh37
-rw-r--r--.ci/scripts/linux/docker.sh14
-rw-r--r--.ci/scripts/linux/exec.sh5
-rw-r--r--.ci/scripts/linux/upload.sh14
-rw-r--r--.ci/scripts/merge/apply-patches-by-label.py28
-rw-r--r--.ci/scripts/merge/check-label-presence.py18
-rw-r--r--.ci/scripts/merge/yuzubot-git-config.sh2
-rw-r--r--.ci/scripts/windows/docker.sh50
-rw-r--r--.ci/scripts/windows/exec.sh5
-rw-r--r--.ci/scripts/windows/scan_dll.py106
-rw-r--r--.ci/scripts/windows/upload.sh13
-rw-r--r--.ci/templates/build-single.yml21
-rw-r--r--.ci/templates/build-standard.yml22
-rw-r--r--.ci/templates/build-testing.yml30
-rw-r--r--.ci/templates/format-check.yml14
-rw-r--r--.ci/templates/merge.yml46
-rw-r--r--.ci/templates/mergebot.yml15
-rw-r--r--.ci/templates/release.yml29
-rw-r--r--.ci/templates/retrieve-artifact-source.yml16
-rw-r--r--.ci/templates/retrieve-master-source.yml11
-rw-r--r--.ci/templates/sync-source.yml7
-rw-r--r--.ci/yuzu-mainline.yml36
-rw-r--r--.ci/yuzu-verify.yml18
-rw-r--r--.ci/yuzu.yml19
-rw-r--r--src/core/file_sys/program_metadata.cpp4
-rw-r--r--src/core/file_sys/program_metadata.h4
-rw-r--r--src/core/hle/kernel/process.cpp16
-rw-r--r--src/core/hle/kernel/process.h34
-rw-r--r--src/core/hle/kernel/svc.cpp117
-rw-r--r--src/core/hle/kernel/svc_wrap.h5
-rw-r--r--src/core/hle/kernel/vm_manager.cpp290
-rw-r--r--src/core/hle/kernel/vm_manager.h48
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache.h299
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/engines/shader_bytecode.h5
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/macro_interpreter.cpp4
-rw-r--r--src/video_core/memory_manager.cpp24
-rw-r--r--src/video_core/memory_manager.h8
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h76
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h82
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp154
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp107
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/renderer_opengl/utils.cpp48
-rw-r--r--src/video_core/renderer_opengl/utils.h41
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp14
-rw-r--r--src/video_core/shader/decode/image.cpp6
-rw-r--r--src/video_core/shader/decode/memory.cpp19
-rw-r--r--src/video_core/shader/decode/texture.cpp16
-rw-r--r--src/video_core/shader/shader_ir.cpp27
-rw-r--r--src/video_core/shader/shader_ir.h17
-rw-r--r--src/video_core/shader/track.cpp25
-rw-r--r--src/video_core/texture_cache/surface_base.cpp9
-rw-r--r--src/video_core/texture_cache/surface_base.h10
-rw-r--r--src/video_core/texture_cache/surface_params.cpp13
-rw-r--r--src/video_core/texture_cache/texture_cache.h37
74 files changed, 1856 insertions, 586 deletions
diff --git a/.ci/scripts/.gitkeep b/.ci/scripts/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
--- a/.ci/scripts/.gitkeep
+++ /dev/null
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh
new file mode 100644
index 000000000..bb4e9d328
--- /dev/null
+++ b/.ci/scripts/common/post-upload.sh
@@ -0,0 +1,15 @@
1#!/bin/bash -ex
2
3# Copy documentation
4cp license.txt "$REV_NAME"
5cp README.md "$REV_NAME"
6
7tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME"
8
9mv "$REV_NAME" $RELEASE_NAME
10
117z a "$REV_NAME.7z" $RELEASE_NAME
12
13# move the compiled archive into the artifacts directory to be uploaded by travis releases
14mv "$ARCHIVE_NAME" artifacts/
15mv "$REV_NAME.7z" artifacts/
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh
new file mode 100644
index 000000000..3c2fc79a2
--- /dev/null
+++ b/.ci/scripts/common/pre-upload.sh
@@ -0,0 +1,6 @@
1#!/bin/bash -ex
2
3GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`"
4GITREV="`git show -s --format='%h'`"
5
6mkdir -p artifacts
diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh
new file mode 100644
index 000000000..778411e4a
--- /dev/null
+++ b/.ci/scripts/format/docker.sh
@@ -0,0 +1,6 @@
1#!/bin/bash -ex
2
3# Run clang-format
4cd /yuzu
5chmod a+x ./.ci/scripts/format/script.sh
6./.ci/scripts/format/script.sh
diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh
new file mode 100644
index 000000000..5d6393b38
--- /dev/null
+++ b/.ci/scripts/format/exec.sh
@@ -0,0 +1,4 @@
1#!/bin/bash -ex
2
3chmod a+x ./.ci/scripts/format/docker.sh
4docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh
diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh
new file mode 100644
index 000000000..5ab828d5e
--- /dev/null
+++ b/.ci/scripts/format/script.sh
@@ -0,0 +1,37 @@
1#!/bin/bash -ex
2
3if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \
4 dist/*.svg dist/*.xml; then
5 echo Trailing whitespace found, aborting
6 exit 1
7fi
8
9# Default clang-format points to default 3.5 version one
10CLANG_FORMAT=clang-format-6.0
11$CLANG_FORMAT --version
12
13if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then
14 # Get list of every file modified in this pull request
15 files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)"
16else
17 # Check everything for branch pushes
18 files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')"
19fi
20
21# Turn off tracing for this because it's too verbose
22set +x
23
24for f in $files_to_lint; do
25 d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true)
26 if ! [ -z "$d" ]; then
27 echo "!!! $f not compliant to coding style, here is the fix:"
28 echo "$d"
29 fail=1
30 fi
31done
32
33set -x
34
35if [ "$fail" = 1 ]; then
36 exit 1
37fi
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh
new file mode 100644
index 000000000..f538a4081
--- /dev/null
+++ b/.ci/scripts/linux/docker.sh
@@ -0,0 +1,14 @@
1#!/bin/bash -ex
2
3cd /yuzu
4
5ccache -s
6
7mkdir build || true && cd build
8cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON
9
10ninja
11
12ccache -s
13
14ctest -VV -C Release
diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh
new file mode 100644
index 000000000..a5a6c34b9
--- /dev/null
+++ b/.ci/scripts/linux/exec.sh
@@ -0,0 +1,5 @@
1#!/bin/bash -ex
2
3mkdir -p "ccache" || true
4chmod a+x ./.ci/scripts/linux/docker.sh
5docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh
new file mode 100644
index 000000000..0d131d1dd
--- /dev/null
+++ b/.ci/scripts/linux/upload.sh
@@ -0,0 +1,14 @@
1#!/bin/bash -ex
2
3. .ci/scripts/common/pre-upload.sh
4
5REV_NAME="yuzu-linux-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.xz"
7COMPRESSION_FLAGS="-cJvf"
8
9mkdir "$REV_NAME"
10
11cp build/bin/yuzu-cmd "$REV_NAME"
12cp build/bin/yuzu "$REV_NAME"
13
14. .ci/scripts/common/post-upload.sh
diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py
new file mode 100644
index 000000000..b346001a5
--- /dev/null
+++ b/.ci/scripts/merge/apply-patches-by-label.py
@@ -0,0 +1,28 @@
1# Download all pull requests as patches that match a specific label
2# Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to>
3
4import requests, sys, json, urllib3.request, shutil, subprocess
5
6http = urllib3.PoolManager()
7dl_list = {}
8
9def check_individual(labels):
10 for label in labels:
11 if (label["name"] == sys.argv[1]):
12 return True
13 return False
14
15try:
16 url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls'
17 response = requests.get(url)
18 if (response.ok):
19 j = json.loads(response.content)
20 for pr in j:
21 if (check_individual(pr["labels"])):
22 pn = pr["number"]
23 print("Matched PR# %s" % pn)
24 print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"]))
25 print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn]))
26 print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn]))
27except:
28 sys.exit(-1)
diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py
new file mode 100644
index 000000000..048466d7e
--- /dev/null
+++ b/.ci/scripts/merge/check-label-presence.py
@@ -0,0 +1,18 @@
1# Checks to see if the specified pull request # has the specified tag
2# Usage: python check-label-presence.py <Pull Request ID> <Name of Label>
3
4import requests, json, sys
5
6try:
7 url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1]
8 response = requests.get(url)
9 if (response.ok):
10 j = json.loads(response.content)
11 for label in j["labels"]:
12 if label["name"] == sys.argv[2]:
13 print('##vso[task.setvariable variable=enabletesting;]true')
14 sys.exit()
15except:
16 sys.exit(-1)
17
18print('##vso[task.setvariable variable=enabletesting;]false')
diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh
new file mode 100644
index 000000000..d9d595bbc
--- /dev/null
+++ b/.ci/scripts/merge/yuzubot-git-config.sh
@@ -0,0 +1,2 @@
1git config --global user.email "yuzu@yuzu-emu.org"
2git config --global user.name "yuzubot" \ No newline at end of file
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh
new file mode 100644
index 000000000..f7093363b
--- /dev/null
+++ b/.ci/scripts/windows/docker.sh
@@ -0,0 +1,50 @@
1#!/bin/bash -ex
2
3cd /yuzu
4
5ccache -s
6
7# Dirty hack to trick unicorn makefile into believing we are in a MINGW system
8mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname
9chmod +x /bin/uname
10
11# Dirty hack to trick unicorn makefile into believing we have cmd
12echo '' >> /bin/cmd
13chmod +x /bin/cmd
14
15mkdir build || true && cd build
16cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release
17ninja
18
19# Clean up the dirty hacks
20rm /bin/uname && mv /bin/uname1 /bin/uname
21rm /bin/cmd
22
23ccache -s
24
25echo "Tests skipped"
26#ctest -VV -C Release
27
28echo 'Prepare binaries...'
29cd ..
30mkdir package
31
32QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/'
33find build/ -name "yuzu*.exe" -exec cp {} 'package' \;
34
35# copy Qt plugins
36mkdir package/platforms
37cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/
38cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/
39cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/
40rm -f package/mediaservice/*d.dll
41
42for i in package/*.exe; do
43 # we need to process pdb here, however, cv2pdb
44 # does not work here, so we just simply strip all the debug symbols
45 x86_64-w64-mingw32-strip "${i}"
46done
47
48pip3 install pefile
49python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/"
50python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/"
diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh
new file mode 100644
index 000000000..d6a994856
--- /dev/null
+++ b/.ci/scripts/windows/exec.sh
@@ -0,0 +1,5 @@
1#!/bin/bash -ex
2
3mkdir -p "ccache" || true
4chmod a+x ./.ci/scripts/windows/docker.sh
5docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh
diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py
new file mode 100644
index 000000000..163183f2e
--- /dev/null
+++ b/.ci/scripts/windows/scan_dll.py
@@ -0,0 +1,106 @@
1import pefile
2import sys
3import re
4import os
5import queue
6import shutil
7
8# constant definitions
9KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL',
10 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL']
11# below is for Ubuntu 18.04 with specified PPA enabled, if you are using
12# other distro or different repositories, change the following accordingly
13DLL_PATH = [
14 '/usr/x86_64-w64-mingw32/bin/',
15 '/usr/x86_64-w64-mingw32/lib/',
16 '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/'
17]
18
19missing = []
20
21
22def parse_imports(file_name):
23 results = []
24 pe = pefile.PE(file_name, fast_load=True)
25 pe.parse_data_directories()
26
27 for entry in pe.DIRECTORY_ENTRY_IMPORT:
28 current = entry.dll.decode()
29 current_u = current.upper() # b/c Windows is often case insensitive
30 # here we filter out system dlls
31 # dll w/ names like *32.dll are likely to be system dlls
32 if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'):
33 results.append(current)
34
35 return results
36
37
38def parse_imports_recursive(file_name, path_list=[]):
39 q = queue.Queue() # create a FIFO queue
40 # file_name can be a string or a list for the convience
41 if isinstance(file_name, str):
42 q.put(file_name)
43 elif isinstance(file_name, list):
44 for i in file_name:
45 q.put(i)
46 full_list = []
47 while q.qsize():
48 current = q.get_nowait()
49 print('> %s' % current)
50 deps = parse_imports(current)
51 # if this dll does not have any import, ignore it
52 if not deps:
53 continue
54 for dep in deps:
55 # the dependency already included in the list, skip
56 if dep in full_list:
57 continue
58 # find the requested dll in the provided paths
59 full_path = find_dll(dep)
60 if not full_path:
61 missing.append(dep)
62 continue
63 full_list.append(dep)
64 q.put(full_path)
65 path_list.append(full_path)
66 return full_list
67
68
69def find_dll(name):
70 for path in DLL_PATH:
71 for root, _, files in os.walk(path):
72 for f in files:
73 if name.lower() == f.lower():
74 return os.path.join(root, f)
75
76
77def deploy(name, dst, dry_run=False):
78 dlls_path = []
79 parse_imports_recursive(name, dlls_path)
80 for dll_entry in dlls_path:
81 if not dry_run:
82 shutil.copy(dll_entry, dst)
83 else:
84 print('[Dry-Run] Copy %s to %s' % (dll_entry, dst))
85 print('Deploy completed.')
86 return dlls_path
87
88
89def main():
90 if len(sys.argv) < 3:
91 print('Usage: %s [files to examine ...] [target deploy directory]')
92 return 1
93 to_deploy = sys.argv[1:-1]
94 tgt_dir = sys.argv[-1]
95 if not os.path.isdir(tgt_dir):
96 print('%s is not a directory.' % tgt_dir)
97 return 1
98 print('Scanning dependencies...')
99 deploy(to_deploy, tgt_dir)
100 if missing:
101 print('Following DLLs are not found: %s' % ('\n'.join(missing)))
102 return 0
103
104
105if __name__ == '__main__':
106 main()
diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh
new file mode 100644
index 000000000..de73d3541
--- /dev/null
+++ b/.ci/scripts/windows/upload.sh
@@ -0,0 +1,13 @@
1#!/bin/bash -ex
2
3. .ci/scripts/common/pre-upload.sh
4
5REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}"
6ARCHIVE_NAME="${REV_NAME}.tar.gz"
7COMPRESSION_FLAGS="-czvf"
8
9mkdir "$REV_NAME"
10# get around the permission issues
11cp -r package/* "$REV_NAME"
12
13. .ci/scripts/common/post-upload.sh
diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml
new file mode 100644
index 000000000..77eeb96b5
--- /dev/null
+++ b/.ci/templates/build-single.yml
@@ -0,0 +1,21 @@
1parameters:
2 artifactSource: 'true'
3
4steps:
5- task: DockerInstaller@0
6 displayName: 'Prepare Environment'
7 inputs:
8 dockerVersion: '17.09.0-ce'
9- task: CacheBeta@0
10 displayName: 'Cache Build System'
11 inputs:
12 key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix)
13 path: $(System.DefaultWorkingDirectory)/ccache
14 cacheHitVar: CACHE_RESTORED
15- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh
16 displayName: 'Build'
17- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && ./.ci/scripts/$(ScriptFolder)/upload.sh
18 displayName: 'Package Artifacts'
19- publish: artifacts
20 artifact: 'yuzu-$(BuildName)-$(BuildSuffix)'
21 displayName: 'Upload Artifacts'
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml
new file mode 100644
index 000000000..9975f5c49
--- /dev/null
+++ b/.ci/templates/build-standard.yml
@@ -0,0 +1,22 @@
1jobs:
2- job: build
3 displayName: 'standard'
4 pool:
5 vmImage: ubuntu-latest
6 strategy:
7 maxParallel: 10
8 matrix:
9 windows:
10 BuildSuffix: 'windows-mingw'
11 ScriptFolder: 'windows'
12 linux:
13 BuildSuffix: 'linux'
14 ScriptFolder: 'linux'
15 steps:
16 - template: ./sync-source.yml
17 parameters:
18 artifactSource: $(parameters.artifactSource)
19 needSubmodules: 'true'
20 - template: ./build-single.yml
21 parameters:
22 artifactSource: 'false' \ No newline at end of file
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml
new file mode 100644
index 000000000..101e52996
--- /dev/null
+++ b/.ci/templates/build-testing.yml
@@ -0,0 +1,30 @@
1jobs:
2- job: build_test
3 displayName: 'testing'
4 pool:
5 vmImage: ubuntu-latest
6 strategy:
7 maxParallel: 10
8 matrix:
9 windows:
10 BuildSuffix: 'windows-testing'
11 ScriptFolder: 'windows'
12 steps:
13 - task: PythonScript@0
14 condition: eq(variables['Build.Reason'], 'PullRequest')
15 displayName: 'Determine Testing Status'
16 inputs:
17 scriptSource: 'filePath'
18 scriptPath: '../scripts/merge/check-label-presence.py'
19 arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build'
20 - ${{ if eq(variables.enabletesting, 'true') }}:
21 - template: ./sync-source.yml
22 parameters:
23 artifactSource: $(parameters.artifactSource)
24 needSubmodules: 'true'
25 - template: ./mergebot.yml
26 parameters:
27 matchLabel: 'testing-merge'
28 - template: ./build-single.yml
29 parameters:
30 artifactSource: 'false' \ No newline at end of file
diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml
new file mode 100644
index 000000000..5061f1cb8
--- /dev/null
+++ b/.ci/templates/format-check.yml
@@ -0,0 +1,14 @@
1parameters:
2 artifactSource: 'true'
3
4steps:
5- template: ./sync-source.yml
6 parameters:
7 artifactSource: $(parameters.artifactSource)
8 needSubmodules: 'false'
9- task: DockerInstaller@0
10 displayName: 'Prepare Environment'
11 inputs:
12 dockerVersion: '17.09.0-ce'
13- script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh
14 displayName: 'Verify Formatting'
diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml
new file mode 100644
index 000000000..efc82778a
--- /dev/null
+++ b/.ci/templates/merge.yml
@@ -0,0 +1,46 @@
1jobs:
2- job: merge
3 displayName: 'pull requests'
4 steps:
5 - checkout: self
6 submodules: recursive
7 - template: ./mergebot.yml
8 parameters:
9 matchLabel: '$(BuildName)-merge'
10 - task: ArchiveFiles@2
11 displayName: 'Package Source'
12 inputs:
13 rootFolderOrFile: '$(System.DefaultWorkingDirectory)'
14 includeRootFolder: false
15 archiveType: '7z'
16 archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
17 - task: PublishPipelineArtifact@1
18 displayName: 'Upload Artifacts'
19 inputs:
20 targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z'
21 artifact: 'yuzu-$(BuildName)-source'
22 replaceExistingArchive: true
23- job: upload_source
24 displayName: 'upload'
25 dependsOn: merge
26 steps:
27 - template: ./sync-source.yml
28 parameters:
29 artifactSource: 'true'
30 needSubmodules: 'true'
31 - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
32 displayName: 'Apply Git Configuration'
33 - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)"
34 displayName: 'Tag Source'
35 - script: git remote add other $(GitRepoPushChangesURL)
36 displayName: 'Register Repository'
37 - script: git push --follow-tags --force other HEAD:$(GitPushBranch)
38 displayName: 'Update Code'
39 - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha
40 displayName: 'Calculate Release Point'
41 - task: PublishPipelineArtifact@1
42 displayName: 'Upload Release Point'
43 inputs:
44 targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha'
45 artifact: 'yuzu-$(BuildName)-release-point'
46 replaceExistingArchive: true \ No newline at end of file
diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml
new file mode 100644
index 000000000..5211efcc6
--- /dev/null
+++ b/.ci/templates/mergebot.yml
@@ -0,0 +1,15 @@
1parameters:
2 matchLabel: 'dummy-merge'
3
4steps:
5 - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3
6 displayName: 'Prepare Environment'
7 - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh
8 displayName: 'Apply Git Configuration'
9 - task: PythonScript@0
10 displayName: 'Discover, Download, and Apply Patches'
11 inputs:
12 scriptSource: 'filePath'
13 scriptPath: '.ci/scripts/merge/apply-patches-by-label.py'
14 arguments: '${{ parameters.matchLabel }} patches'
15 workingDirectory: '$(System.DefaultWorkingDirectory)'
diff --git a/.ci/templates/release.yml b/.ci/templates/release.yml
new file mode 100644
index 000000000..60bebd2aa
--- /dev/null
+++ b/.ci/templates/release.yml
@@ -0,0 +1,29 @@
1steps:
2 - task: DownloadPipelineArtifact@2
3 displayName: 'Download Windows Release'
4 inputs:
5 artifactName: 'yuzu-$(BuildName)-windows-mingw'
6 buildType: 'current'
7 targetPath: '$(Build.ArtifactStagingDirectory)'
8 - task: DownloadPipelineArtifact@2
9 displayName: 'Download Linux Release'
10 inputs:
11 artifactName: 'yuzu-$(BuildName)-linux'
12 buildType: 'current'
13 targetPath: '$(Build.ArtifactStagingDirectory)'
14 - task: DownloadPipelineArtifact@2
15 displayName: 'Download Release Point'
16 inputs:
17 artifactName: 'yuzu-$(BuildName)-release-point'
18 buildType: 'current'
19 targetPath: '$(Build.ArtifactStagingDirectory)'
20 - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha
21 displayName: 'Calculate Release Point'
22 - task: GitHubRelease@0
23 inputs:
24 gitHubConnection: $(GitHubReleaseConnectionName)
25 repositoryName: '$(GitHubReleaseRepoName)'
26 action: 'create'
27 target: $(variables.tagcommit)
28 title: 'yuzu $(BuildName) #$(Build.BuildId)'
29 assets: '$(Build.ArtifactStagingDirectory)/*'
diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml
new file mode 100644
index 000000000..47d217e7b
--- /dev/null
+++ b/.ci/templates/retrieve-artifact-source.yml
@@ -0,0 +1,16 @@
1steps:
2- checkout: none
3- task: DownloadPipelineArtifact@2
4 displayName: 'Download Source'
5 inputs:
6 artifactName: 'yuzu-$(BuildName)-source'
7 buildType: 'current'
8 targetPath: '$(Build.ArtifactStagingDirectory)'
9- script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory)
10 displayName: 'Clean Working Directory'
11- task: ExtractFiles@1
12 displayName: 'Prepare Source'
13 inputs:
14 archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z'
15 destinationFolder: '$(System.DefaultWorkingDirectory)'
16 cleanDestinationFolder: false \ No newline at end of file
diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml
new file mode 100644
index 000000000..a08a3f926
--- /dev/null
+++ b/.ci/templates/retrieve-master-source.yml
@@ -0,0 +1,11 @@
1parameters:
2 needSubmodules: 'true'
3
4steps:
5- checkout: self
6 displayName: 'Checkout Recursive'
7 submodules: recursive
8# condition: eq(parameters.needSubmodules, 'true')
9#- checkout: self
10# displayName: 'Checkout Fast'
11# condition: ne(parameters.needSubmodules, 'true')
diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml
new file mode 100644
index 000000000..409e1cd83
--- /dev/null
+++ b/.ci/templates/sync-source.yml
@@ -0,0 +1,7 @@
1steps:
2- ${{ if eq(parameters.artifactSource, 'true') }}:
3 - template: ./retrieve-artifact-source.yml
4- ${{ if ne(parameters.artifactSource, 'true') }}:
5 - template: ./retrieve-master-source.yml
6 parameters:
7 needSubmodules: $(parameters.needSubmodules) \ No newline at end of file
diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml
index aa912913d..164bcb165 100644
--- a/.ci/yuzu-mainline.yml
+++ b/.ci/yuzu-mainline.yml
@@ -1,19 +1,23 @@
1# Starter pipeline
2# Start with a minimal pipeline that you can customize to build and deploy your code.
3# Add steps that build, run tests, deploy, and more:
4# https://aka.ms/yaml
5
6trigger: 1trigger:
7- master 2- master
8 3
9pool: 4stages:
10 vmImage: 'ubuntu-latest' 5- stage: merge
11 6 displayName: 'merge'
12steps: 7 jobs:
13- script: echo Hello, world! 8 - template: ./templates/merge.yml
14 displayName: 'Run a one-line script' 9- stage: format
15 10 dependsOn: merge
16- script: | 11 displayName: 'format'
17 echo Add other tasks to build, test, and deploy your project. 12 jobs:
18 echo See https://aka.ms/yaml 13 - job: format
19 displayName: 'Run a multi-line script' 14 displayName: 'clang'
15 pool:
16 vmImage: ubuntu-latest
17 steps:
18 - template: ./templates/format-check.yml
19- stage: build
20 displayName: 'build'
21 dependsOn: format
22 jobs:
23 - template: ./templates/build-standard.yml
diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml
new file mode 100644
index 000000000..d01c1feed
--- /dev/null
+++ b/.ci/yuzu-verify.yml
@@ -0,0 +1,18 @@
1stages:
2- stage: format
3 displayName: 'format'
4 jobs:
5 - job: format
6 displayName: 'clang'
7 pool:
8 vmImage: ubuntu-latest
9 steps:
10 - template: ./templates/format-check.yml
11 parameters:
12 artifactSource: 'false'
13- stage: build
14 displayName: 'build'
15 dependsOn: format
16 jobs:
17 - template: ./templates/build-standard.yml
18 - template: ./templates/build-testing.yml \ No newline at end of file
diff --git a/.ci/yuzu.yml b/.ci/yuzu.yml
deleted file mode 100644
index aa912913d..000000000
--- a/.ci/yuzu.yml
+++ /dev/null
@@ -1,19 +0,0 @@
1# Starter pipeline
2# Start with a minimal pipeline that you can customize to build and deploy your code.
3# Add steps that build, run tests, deploy, and more:
4# https://aka.ms/yaml
5
6trigger:
7- master
8
9pool:
10 vmImage: 'ubuntu-latest'
11
12steps:
13- script: echo Hello, world!
14 displayName: 'Run a one-line script'
15
16- script: |
17 echo Add other tasks to build, test, and deploy your project.
18 echo See https://aka.ms/yaml
19 displayName: 'Run a multi-line script'
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp
index eb76174c5..7310b3602 100644
--- a/src/core/file_sys/program_metadata.cpp
+++ b/src/core/file_sys/program_metadata.cpp
@@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const {
94 return aci_file_access.permissions; 94 return aci_file_access.permissions;
95} 95}
96 96
97u32 ProgramMetadata::GetSystemResourceSize() const {
98 return npdm_header.system_resource_size;
99}
100
97const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { 101const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const {
98 return aci_kernel_capabilities; 102 return aci_kernel_capabilities;
99} 103}
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 43bf2820a..88ec97d85 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -58,6 +58,7 @@ public:
58 u32 GetMainThreadStackSize() const; 58 u32 GetMainThreadStackSize() const;
59 u64 GetTitleID() const; 59 u64 GetTitleID() const;
60 u64 GetFilesystemPermissions() const; 60 u64 GetFilesystemPermissions() const;
61 u32 GetSystemResourceSize() const;
61 const KernelCapabilityDescriptors& GetKernelCapabilities() const; 62 const KernelCapabilityDescriptors& GetKernelCapabilities() const;
62 63
63 void Print() const; 64 void Print() const;
@@ -76,7 +77,8 @@ private:
76 u8 reserved_3; 77 u8 reserved_3;
77 u8 main_thread_priority; 78 u8 main_thread_priority;
78 u8 main_thread_cpu; 79 u8 main_thread_cpu;
79 std::array<u8, 8> reserved_4; 80 std::array<u8, 4> reserved_4;
81 u32_le system_resource_size;
80 u32_le process_category; 82 u32_le process_category;
81 u32_le main_stack_size; 83 u32_le main_stack_size;
82 std::array<u8, 0x10> application_name; 84 std::array<u8, 0x10> application_name;
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index f45ef05f6..db3ab14ce 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const {
129 return vm_manager.GetTotalPhysicalMemoryAvailable(); 129 return vm_manager.GetTotalPhysicalMemoryAvailable();
130} 130}
131 131
132u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { 132u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
133 // TODO: Subtract the personal heap size from this when the 133 return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize();
134 // personal heap is implemented.
135 return GetTotalPhysicalMemoryAvailable();
136} 134}
137 135
138u64 Process::GetTotalPhysicalMemoryUsed() const { 136u64 Process::GetTotalPhysicalMemoryUsed() const {
139 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; 137 return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size +
138 GetSystemResourceUsage();
140} 139}
141 140
142u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { 141u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
143 // TODO: Subtract the personal heap size from this when the 142 return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage();
144 // personal heap is implemented.
145 return GetTotalPhysicalMemoryUsed();
146} 143}
147 144
148void Process::RegisterThread(const Thread* thread) { 145void Process::RegisterThread(const Thread* thread) {
@@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
172 program_id = metadata.GetTitleID(); 169 program_id = metadata.GetTitleID();
173 ideal_core = metadata.GetMainThreadCore(); 170 ideal_core = metadata.GetMainThreadCore();
174 is_64bit_process = metadata.Is64BitProgram(); 171 is_64bit_process = metadata.Is64BitProgram();
172 system_resource_size = metadata.GetSystemResourceSize();
175 173
176 vm_manager.Reset(metadata.GetAddressSpaceType()); 174 vm_manager.Reset(metadata.GetAddressSpaceType());
177 175
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 83ea02bee..3196014da 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -168,8 +168,24 @@ public:
168 return capabilities.GetPriorityMask(); 168 return capabilities.GetPriorityMask();
169 } 169 }
170 170
171 u32 IsVirtualMemoryEnabled() const { 171 /// Gets the amount of secure memory to allocate for memory management.
172 return is_virtual_address_memory_enabled; 172 u32 GetSystemResourceSize() const {
173 return system_resource_size;
174 }
175
176 /// Gets the amount of secure memory currently in use for memory management.
177 u32 GetSystemResourceUsage() const {
178 // On hardware, this returns the amount of system resource memory that has
179 // been used by the kernel. This is problematic for Yuzu to emulate, because
180 // system resource memory is used for page tables -- and yuzu doesn't really
181 // have a way to calculate how much memory is required for page tables for
182 // the current process at any given time.
183 // TODO: Is this even worth implementing? Games may retrieve this value via
184 // an SDK function that gets used + available system resource size for debug
185 // or diagnostic purposes. However, it seems unlikely that a game would make
186 // decisions based on how much system memory is dedicated to its page tables.
187 // Is returning a value other than zero wise?
188 return 0;
173 } 189 }
174 190
175 /// Whether this process is an AArch64 or AArch32 process. 191 /// Whether this process is an AArch64 or AArch32 process.
@@ -196,15 +212,15 @@ public:
196 u64 GetTotalPhysicalMemoryAvailable() const; 212 u64 GetTotalPhysicalMemoryAvailable() const;
197 213
198 /// Retrieves the total physical memory available to this process in bytes, 214 /// Retrieves the total physical memory available to this process in bytes,
199 /// without the size of the personal heap added to it. 215 /// without the size of the personal system resource heap added to it.
200 u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; 216 u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const;
201 217
202 /// Retrieves the total physical memory used by this process in bytes. 218 /// Retrieves the total physical memory used by this process in bytes.
203 u64 GetTotalPhysicalMemoryUsed() const; 219 u64 GetTotalPhysicalMemoryUsed() const;
204 220
205 /// Retrieves the total physical memory used by this process in bytes, 221 /// Retrieves the total physical memory used by this process in bytes,
206 /// without the size of the personal heap added to it. 222 /// without the size of the personal system resource heap added to it.
207 u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; 223 u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const;
208 224
209 /// Gets the list of all threads created with this process as their owner. 225 /// Gets the list of all threads created with this process as their owner.
210 const std::list<const Thread*>& GetThreadList() const { 226 const std::list<const Thread*>& GetThreadList() const {
@@ -298,12 +314,16 @@ private:
298 /// Title ID corresponding to the process 314 /// Title ID corresponding to the process
299 u64 program_id = 0; 315 u64 program_id = 0;
300 316
317 /// Specifies additional memory to be reserved for the process's memory management by the
318 /// system. When this is non-zero, secure memory is allocated and used for page table allocation
319 /// instead of using the normal global page tables/memory block management.
320 u32 system_resource_size = 0;
321
301 /// Resource limit descriptor for this process 322 /// Resource limit descriptor for this process
302 SharedPtr<ResourceLimit> resource_limit; 323 SharedPtr<ResourceLimit> resource_limit;
303 324
304 /// The ideal CPU core for this process, threads are scheduled on this core by default. 325 /// The ideal CPU core for this process, threads are scheduled on this core by default.
305 u8 ideal_core = 0; 326 u8 ideal_core = 0;
306 u32 is_virtual_address_memory_enabled = 0;
307 327
308 /// The Thread Local Storage area is allocated as processes create threads, 328 /// The Thread Local Storage area is allocated as processes create threads,
309 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part 329 /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 58374f829..a46eed3da 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -736,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
736 StackRegionBaseAddr = 14, 736 StackRegionBaseAddr = 14,
737 StackRegionSize = 15, 737 StackRegionSize = 15,
738 // 3.0.0+ 738 // 3.0.0+
739 IsVirtualAddressMemoryEnabled = 16, 739 SystemResourceSize = 16,
740 PersonalMmHeapUsage = 17, 740 SystemResourceUsage = 17,
741 TitleId = 18, 741 TitleId = 18,
742 // 4.0.0+ 742 // 4.0.0+
743 PrivilegedProcessId = 19, 743 PrivilegedProcessId = 19,
744 // 5.0.0+ 744 // 5.0.0+
745 UserExceptionContextAddr = 20, 745 UserExceptionContextAddr = 20,
746 // 6.0.0+ 746 // 6.0.0+
747 TotalPhysicalMemoryAvailableWithoutMmHeap = 21, 747 TotalPhysicalMemoryAvailableWithoutSystemResource = 21,
748 TotalPhysicalMemoryUsedWithoutMmHeap = 22, 748 TotalPhysicalMemoryUsedWithoutSystemResource = 22,
749 }; 749 };
750 750
751 const auto info_id_type = static_cast<GetInfoType>(info_id); 751 const auto info_id_type = static_cast<GetInfoType>(info_id);
@@ -763,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
763 case GetInfoType::StackRegionSize: 763 case GetInfoType::StackRegionSize:
764 case GetInfoType::TotalPhysicalMemoryAvailable: 764 case GetInfoType::TotalPhysicalMemoryAvailable:
765 case GetInfoType::TotalPhysicalMemoryUsed: 765 case GetInfoType::TotalPhysicalMemoryUsed:
766 case GetInfoType::IsVirtualAddressMemoryEnabled: 766 case GetInfoType::SystemResourceSize:
767 case GetInfoType::PersonalMmHeapUsage: 767 case GetInfoType::SystemResourceUsage:
768 case GetInfoType::TitleId: 768 case GetInfoType::TitleId:
769 case GetInfoType::UserExceptionContextAddr: 769 case GetInfoType::UserExceptionContextAddr:
770 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 770 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
771 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { 771 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: {
772 if (info_sub_id != 0) { 772 if (info_sub_id != 0) {
773 return ERR_INVALID_ENUM_VALUE; 773 return ERR_INVALID_ENUM_VALUE;
774 } 774 }
@@ -829,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
829 *result = process->GetTotalPhysicalMemoryUsed(); 829 *result = process->GetTotalPhysicalMemoryUsed();
830 return RESULT_SUCCESS; 830 return RESULT_SUCCESS;
831 831
832 case GetInfoType::IsVirtualAddressMemoryEnabled: 832 case GetInfoType::SystemResourceSize:
833 *result = process->IsVirtualMemoryEnabled(); 833 *result = process->GetSystemResourceSize();
834 return RESULT_SUCCESS;
835
836 case GetInfoType::SystemResourceUsage:
837 LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage");
838 *result = process->GetSystemResourceUsage();
834 return RESULT_SUCCESS; 839 return RESULT_SUCCESS;
835 840
836 case GetInfoType::TitleId: 841 case GetInfoType::TitleId:
@@ -843,12 +848,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
843 *result = 0; 848 *result = 0;
844 return RESULT_SUCCESS; 849 return RESULT_SUCCESS;
845 850
846 case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: 851 case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource:
847 *result = process->GetTotalPhysicalMemoryAvailable(); 852 *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource();
848 return RESULT_SUCCESS; 853 return RESULT_SUCCESS;
849 854
850 case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: 855 case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource:
851 *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); 856 *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource();
852 return RESULT_SUCCESS; 857 return RESULT_SUCCESS;
853 858
854 default: 859 default:
@@ -953,6 +958,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha
953 } 958 }
954} 959}
955 960
961/// Maps memory at a desired address
962static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
963 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
964
965 if (!Common::Is4KBAligned(addr)) {
966 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
967 return ERR_INVALID_ADDRESS;
968 }
969
970 if (!Common::Is4KBAligned(size)) {
971 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
972 return ERR_INVALID_SIZE;
973 }
974
975 if (size == 0) {
976 LOG_ERROR(Kernel_SVC, "Size is zero");
977 return ERR_INVALID_SIZE;
978 }
979
980 if (!(addr < addr + size)) {
981 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
982 return ERR_INVALID_MEMORY_RANGE;
983 }
984
985 Process* const current_process = system.Kernel().CurrentProcess();
986 auto& vm_manager = current_process->VMManager();
987
988 if (current_process->GetSystemResourceSize() == 0) {
989 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
990 return ERR_INVALID_STATE;
991 }
992
993 if (!vm_manager.IsWithinMapRegion(addr, size)) {
994 LOG_ERROR(Kernel_SVC, "Range not within map region");
995 return ERR_INVALID_MEMORY_RANGE;
996 }
997
998 return vm_manager.MapPhysicalMemory(addr, size);
999}
1000
1001/// Unmaps memory previously mapped via MapPhysicalMemory
1002static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) {
1003 LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size);
1004
1005 if (!Common::Is4KBAligned(addr)) {
1006 LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr);
1007 return ERR_INVALID_ADDRESS;
1008 }
1009
1010 if (!Common::Is4KBAligned(size)) {
1011 LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size);
1012 return ERR_INVALID_SIZE;
1013 }
1014
1015 if (size == 0) {
1016 LOG_ERROR(Kernel_SVC, "Size is zero");
1017 return ERR_INVALID_SIZE;
1018 }
1019
1020 if (!(addr < addr + size)) {
1021 LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address");
1022 return ERR_INVALID_MEMORY_RANGE;
1023 }
1024
1025 Process* const current_process = system.Kernel().CurrentProcess();
1026 auto& vm_manager = current_process->VMManager();
1027
1028 if (current_process->GetSystemResourceSize() == 0) {
1029 LOG_ERROR(Kernel_SVC, "System Resource Size is zero");
1030 return ERR_INVALID_STATE;
1031 }
1032
1033 if (!vm_manager.IsWithinMapRegion(addr, size)) {
1034 LOG_ERROR(Kernel_SVC, "Range not within map region");
1035 return ERR_INVALID_MEMORY_RANGE;
1036 }
1037
1038 return vm_manager.UnmapPhysicalMemory(addr, size);
1039}
1040
956/// Sets the thread activity 1041/// Sets the thread activity
957static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { 1042static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) {
958 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); 1043 LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity);
@@ -2310,8 +2395,8 @@ static const FunctionDef SVC_Table[] = {
2310 {0x29, SvcWrap<GetInfo>, "GetInfo"}, 2395 {0x29, SvcWrap<GetInfo>, "GetInfo"},
2311 {0x2A, nullptr, "FlushEntireDataCache"}, 2396 {0x2A, nullptr, "FlushEntireDataCache"},
2312 {0x2B, nullptr, "FlushDataCache"}, 2397 {0x2B, nullptr, "FlushDataCache"},
2313 {0x2C, nullptr, "MapPhysicalMemory"}, 2398 {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"},
2314 {0x2D, nullptr, "UnmapPhysicalMemory"}, 2399 {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"},
2315 {0x2E, nullptr, "GetFutureThreadInfo"}, 2400 {0x2E, nullptr, "GetFutureThreadInfo"},
2316 {0x2F, nullptr, "GetLastThreadInfo"}, 2401 {0x2F, nullptr, "GetLastThreadInfo"},
2317 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, 2402 {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 865473c6f..c2d8d0dc3 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) {
32 FuncReturn(system, func(system, Param(system, 0)).raw); 32 FuncReturn(system, func(system, Param(system, 0)).raw);
33} 33}
34 34
35template <ResultCode func(Core::System&, u64, u64)>
36void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw);
38}
39
35template <ResultCode func(Core::System&, u32)> 40template <ResultCode func(Core::System&, u32)>
36void SvcWrap(Core::System& system) { 41void SvcWrap(Core::System& system) {
37 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); 42 FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw);
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 7bc925a5f..4f45fb03b 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -11,6 +11,8 @@
11#include "core/core.h" 11#include "core/core.h"
12#include "core/file_sys/program_metadata.h" 12#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/errors.h" 13#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/process.h"
15#include "core/hle/kernel/resource_limit.h"
14#include "core/hle/kernel/vm_manager.h" 16#include "core/hle/kernel/vm_manager.h"
15#include "core/memory.h" 17#include "core/memory.h"
16#include "core/memory_setup.h" 18#include "core/memory_setup.h"
@@ -48,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
48 type != next.type) { 50 type != next.type) {
49 return false; 51 return false;
50 } 52 }
51 if (type == VMAType::AllocatedMemoryBlock && 53 if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) {
52 (backing_block != next.backing_block || offset + size != next.offset)) { 54 // TODO: Can device mapped memory be merged sanely?
55 // Not merging it may cause inaccuracies versus hardware when memory layout is queried.
53 return false; 56 return false;
54 } 57 }
58 if (type == VMAType::AllocatedMemoryBlock) {
59 return true;
60 }
55 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { 61 if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) {
56 return false; 62 return false;
57 } 63 }
@@ -99,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
99ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, 105ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
100 std::shared_ptr<std::vector<u8>> block, 106 std::shared_ptr<std::vector<u8>> block,
101 std::size_t offset, u64 size, 107 std::size_t offset, u64 size,
102 MemoryState state) { 108 MemoryState state, VMAPermission perm) {
103 ASSERT(block != nullptr); 109 ASSERT(block != nullptr);
104 ASSERT(offset + size <= block->size()); 110 ASSERT(offset + size <= block->size());
105 111
@@ -109,7 +115,7 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
109 ASSERT(final_vma.size == size); 115 ASSERT(final_vma.size == size);
110 116
111 final_vma.type = VMAType::AllocatedMemoryBlock; 117 final_vma.type = VMAType::AllocatedMemoryBlock;
112 final_vma.permissions = VMAPermission::ReadWrite; 118 final_vma.permissions = perm;
113 final_vma.state = state; 119 final_vma.state = state;
114 final_vma.backing_block = std::move(block); 120 final_vma.backing_block = std::move(block);
115 final_vma.offset = offset; 121 final_vma.offset = offset;
@@ -288,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
288 return MakeResult<VAddr>(heap_region_base); 294 return MakeResult<VAddr>(heap_region_base);
289} 295}
290 296
297ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
298 const auto end_addr = target + size;
299 const auto last_addr = end_addr - 1;
300 VAddr cur_addr = target;
301
302 ResultCode result = RESULT_SUCCESS;
303
304 // Check how much memory we've already mapped.
305 const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
306 if (mapped_size_result.Failed()) {
307 return mapped_size_result.Code();
308 }
309
310 // If we've already mapped the desired amount, return early.
311 const std::size_t mapped_size = *mapped_size_result;
312 if (mapped_size == size) {
313 return RESULT_SUCCESS;
314 }
315
316 // Check that we can map the memory we want.
317 const auto res_limit = system.CurrentProcess()->GetResourceLimit();
318 const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) -
319 res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory);
320 if (physmem_remaining < (size - mapped_size)) {
321 return ERR_RESOURCE_LIMIT_EXCEEDED;
322 }
323
324 // Keep track of the memory regions we unmap.
325 std::vector<std::pair<u64, u64>> mapped_regions;
326
327 // Iterate, trying to map memory.
328 {
329 cur_addr = target;
330
331 auto iter = FindVMA(target);
332 ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
333
334 while (true) {
335 const auto& vma = iter->second;
336 const auto vma_start = vma.base;
337 const auto vma_end = vma_start + vma.size;
338 const auto vma_last = vma_end - 1;
339
340 // Map the memory block
341 const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
342 if (vma.state == MemoryState::Unmapped) {
343 const auto map_res =
344 MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0,
345 map_size, MemoryState::Heap, VMAPermission::ReadWrite);
346 result = map_res.Code();
347 if (result.IsError()) {
348 break;
349 }
350
351 mapped_regions.emplace_back(cur_addr, map_size);
352 }
353
354 // Break once we hit the end of the range.
355 if (last_addr <= vma_last) {
356 break;
357 }
358
359 // Advance to the next block.
360 cur_addr = vma_end;
361 iter = FindVMA(cur_addr);
362 ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
363 }
364 }
365
366 // If we failed, unmap memory.
367 if (result.IsError()) {
368 for (const auto [unmap_address, unmap_size] : mapped_regions) {
369 ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
370 "MapPhysicalMemory un-map on error");
371 }
372
373 return result;
374 }
375
376 // Update amount of mapped physical memory.
377 physical_memory_mapped += size - mapped_size;
378
379 return RESULT_SUCCESS;
380}
381
382ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
383 const auto end_addr = target + size;
384 const auto last_addr = end_addr - 1;
385 VAddr cur_addr = target;
386
387 ResultCode result = RESULT_SUCCESS;
388
389 // Check how much memory is currently mapped.
390 const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
391 if (mapped_size_result.Failed()) {
392 return mapped_size_result.Code();
393 }
394
395 // If we've already unmapped all the memory, return early.
396 const std::size_t mapped_size = *mapped_size_result;
397 if (mapped_size == 0) {
398 return RESULT_SUCCESS;
399 }
400
401 // Keep track of the memory regions we unmap.
402 std::vector<std::pair<u64, u64>> unmapped_regions;
403
404 // Try to unmap regions.
405 {
406 cur_addr = target;
407
408 auto iter = FindVMA(target);
409 ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
410
411 while (true) {
412 const auto& vma = iter->second;
413 const auto vma_start = vma.base;
414 const auto vma_end = vma_start + vma.size;
415 const auto vma_last = vma_end - 1;
416
417 // Unmap the memory block
418 const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
419 if (vma.state == MemoryState::Heap) {
420 result = UnmapRange(cur_addr, unmap_size);
421 if (result.IsError()) {
422 break;
423 }
424
425 unmapped_regions.emplace_back(cur_addr, unmap_size);
426 }
427
428 // Break once we hit the end of the range.
429 if (last_addr <= vma_last) {
430 break;
431 }
432
433 // Advance to the next block.
434 cur_addr = vma_end;
435 iter = FindVMA(cur_addr);
436 ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
437 }
438 }
439
440 // If we failed, re-map regions.
441 // TODO: Preserve memory contents?
442 if (result.IsError()) {
443 for (const auto [map_address, map_size] : unmapped_regions) {
444 const auto remap_res =
445 MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0,
446 map_size, MemoryState::Heap, VMAPermission::None);
447 ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
448 }
449 }
450
451 // Update mapped amount
452 physical_memory_mapped -= mapped_size;
453
454 return RESULT_SUCCESS;
455}
456
291ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { 457ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) {
292 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; 458 constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped;
293 const auto src_check_result = CheckRangeState( 459 const auto src_check_result = CheckRangeState(
@@ -435,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
435 // Protect mirror with permissions from old region 601 // Protect mirror with permissions from old region
436 Reprotect(new_vma, vma->second.permissions); 602 Reprotect(new_vma, vma->second.permissions);
437 // Remove permissions from old region 603 // Remove permissions from old region
438 Reprotect(vma, VMAPermission::None); 604 ReprotectRange(src_addr, size, VMAPermission::None);
439 605
440 return RESULT_SUCCESS; 606 return RESULT_SUCCESS;
441} 607}
@@ -568,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) {
568VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { 734VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
569 const VMAIter next_vma = std::next(iter); 735 const VMAIter next_vma = std::next(iter);
570 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { 736 if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) {
571 iter->second.size += next_vma->second.size; 737 MergeAdjacentVMA(iter->second, next_vma->second);
572 vma_map.erase(next_vma); 738 vma_map.erase(next_vma);
573 } 739 }
574 740
575 if (iter != vma_map.begin()) { 741 if (iter != vma_map.begin()) {
576 VMAIter prev_vma = std::prev(iter); 742 VMAIter prev_vma = std::prev(iter);
577 if (prev_vma->second.CanBeMergedWith(iter->second)) { 743 if (prev_vma->second.CanBeMergedWith(iter->second)) {
578 prev_vma->second.size += iter->second.size; 744 MergeAdjacentVMA(prev_vma->second, iter->second);
579 vma_map.erase(iter); 745 vma_map.erase(iter);
580 iter = prev_vma; 746 iter = prev_vma;
581 } 747 }
@@ -584,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
584 return iter; 750 return iter;
585} 751}
586 752
753void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) {
754 ASSERT(left.CanBeMergedWith(right));
755
756 // Always merge allocated memory blocks, even when they don't share the same backing block.
757 if (left.type == VMAType::AllocatedMemoryBlock &&
758 (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
759 // Check if we can save work.
760 if (left.offset == 0 && left.size == left.backing_block->size()) {
761 // Fast case: left is an entire backing block.
762 left.backing_block->insert(left.backing_block->end(),
763 right.backing_block->begin() + right.offset,
764 right.backing_block->begin() + right.offset + right.size);
765 } else {
766 // Slow case: make a new memory block for left and right.
767 auto new_memory = std::make_shared<std::vector<u8>>();
768 new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
769 left.backing_block->begin() + left.offset + left.size);
770 new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
771 right.backing_block->begin() + right.offset + right.size);
772 left.backing_block = new_memory;
773 left.offset = 0;
774 }
775
776 // Page table update is needed, because backing memory changed.
777 left.size += right.size;
778 UpdatePageTableForVMA(left);
779 } else {
780 // Just update the size.
781 left.size += right.size;
782 }
783}
784
587void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { 785void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
588 switch (vma.type) { 786 switch (vma.type) {
589 case VMAType::Free: 787 case VMAType::Free:
@@ -758,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo
758 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); 956 std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask));
759} 957}
760 958
959ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
960 std::size_t size) const {
961 const VAddr end_addr = address + size;
962 const VAddr last_addr = end_addr - 1;
963 std::size_t mapped_size = 0;
964
965 VAddr cur_addr = address;
966 auto iter = FindVMA(cur_addr);
967 ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
968
969 while (true) {
970 const auto& vma = iter->second;
971 const VAddr vma_start = vma.base;
972 const VAddr vma_end = vma_start + vma.size;
973 const VAddr vma_last = vma_end - 1;
974
975 // Add size if relevant.
976 if (vma.state != MemoryState::Unmapped) {
977 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
978 }
979
980 // Break once we hit the end of the range.
981 if (last_addr <= vma_last) {
982 break;
983 }
984
985 // Advance to the next block.
986 cur_addr = vma_end;
987 iter = std::next(iter);
988 ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
989 }
990
991 return MakeResult(mapped_size);
992}
993
994ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
995 std::size_t size) const {
996 const VAddr end_addr = address + size;
997 const VAddr last_addr = end_addr - 1;
998 std::size_t mapped_size = 0;
999
1000 VAddr cur_addr = address;
1001 auto iter = FindVMA(cur_addr);
1002 ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
1003
1004 while (true) {
1005 const auto& vma = iter->second;
1006 const auto vma_start = vma.base;
1007 const auto vma_end = vma_start + vma.size;
1008 const auto vma_last = vma_end - 1;
1009 const auto state = vma.state;
1010 const auto attr = vma.attribute;
1011
1012 // Memory within region must be free or mapped heap.
1013 if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) ||
1014 (state == MemoryState::Unmapped))) {
1015 return ERR_INVALID_ADDRESS_STATE;
1016 }
1017
1018 // Add size if relevant.
1019 if (state != MemoryState::Unmapped) {
1020 mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr);
1021 }
1022
1023 // Break once we hit the end of the range.
1024 if (last_addr <= vma_last) {
1025 break;
1026 }
1027
1028 // Advance to the next block.
1029 cur_addr = vma_end;
1030 iter = std::next(iter);
1031 ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
1032 }
1033
1034 return MakeResult(mapped_size);
1035}
1036
761u64 VMManager::GetTotalPhysicalMemoryAvailable() const { 1037u64 VMManager::GetTotalPhysicalMemoryAvailable() const {
762 LOG_WARNING(Kernel, "(STUBBED) called"); 1038 LOG_WARNING(Kernel, "(STUBBED) called");
763 return 0xF8000000; 1039 return 0xF8000000;
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 9fe6ac3f4..0aecb7499 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -349,7 +349,8 @@ public:
349 * @param state MemoryState tag to attach to the VMA. 349 * @param state MemoryState tag to attach to the VMA.
350 */ 350 */
351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, 351 ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
352 std::size_t offset, u64 size, MemoryState state); 352 std::size_t offset, u64 size, MemoryState state,
353 VMAPermission perm = VMAPermission::ReadWrite);
353 354
354 /** 355 /**
355 * Maps an unmanaged host memory pointer at a given address. 356 * Maps an unmanaged host memory pointer at a given address.
@@ -450,6 +451,34 @@ public:
450 /// 451 ///
451 ResultVal<VAddr> SetHeapSize(u64 size); 452 ResultVal<VAddr> SetHeapSize(u64 size);
452 453
454 /// Maps memory at a given address.
455 ///
456 /// @param addr The virtual address to map memory at.
457 /// @param size The amount of memory to map.
458 ///
459 /// @note The destination address must lie within the Map region.
460 ///
461 /// @note This function requires that SystemResourceSize be non-zero,
462 /// however, this is just because if it were not then the
463 /// resulting page tables could be exploited on hardware by
464 /// a malicious program. SystemResource usage does not need
465 /// to be explicitly checked or updated here.
466 ResultCode MapPhysicalMemory(VAddr target, u64 size);
467
468 /// Unmaps memory at a given address.
469 ///
470 /// @param addr The virtual address to unmap memory at.
471 /// @param size The amount of memory to unmap.
472 ///
473 /// @note The destination address must lie within the Map region.
474 ///
475 /// @note This function requires that SystemResourceSize be non-zero,
476 /// however, this is just because if it were not then the
477 /// resulting page tables could be exploited on hardware by
478 /// a malicious program. SystemResource usage does not need
479 /// to be explicitly checked or updated here.
480 ResultCode UnmapPhysicalMemory(VAddr target, u64 size);
481
453 /// Maps a region of memory as code memory. 482 /// Maps a region of memory as code memory.
454 /// 483 ///
455 /// @param dst_address The base address of the region to create the aliasing memory region. 484 /// @param dst_address The base address of the region to create the aliasing memory region.
@@ -657,6 +686,11 @@ private:
657 */ 686 */
658 VMAIter MergeAdjacent(VMAIter vma); 687 VMAIter MergeAdjacent(VMAIter vma);
659 688
689 /**
690 * Merges two adjacent VMAs.
691 */
692 void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right);
693
660 /// Updates the pages corresponding to this VMA so they match the VMA's attributes. 694 /// Updates the pages corresponding to this VMA so they match the VMA's attributes.
661 void UpdatePageTableForVMA(const VirtualMemoryArea& vma); 695 void UpdatePageTableForVMA(const VirtualMemoryArea& vma);
662 696
@@ -701,6 +735,13 @@ private:
701 MemoryAttribute attribute_mask, MemoryAttribute attribute, 735 MemoryAttribute attribute_mask, MemoryAttribute attribute,
702 MemoryAttribute ignore_mask) const; 736 MemoryAttribute ignore_mask) const;
703 737
738 /// Gets the amount of memory currently mapped (state != Unmapped) in a range.
739 ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const;
740
741 /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range.
742 ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address,
743 std::size_t size) const;
744
704 /** 745 /**
705 * A map covering the entirety of the managed address space, keyed by the `base` field of each 746 * A map covering the entirety of the managed address space, keyed by the `base` field of each
706 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant 747 * VMA. It must always be modified by splitting or merging VMAs, so that the invariant
@@ -742,6 +783,11 @@ private:
742 // end of the range. This is essentially 'base_address + current_size'. 783 // end of the range. This is essentially 'base_address + current_size'.
743 VAddr heap_end = 0; 784 VAddr heap_end = 0;
744 785
786 // The current amount of memory mapped via MapPhysicalMemory.
787 // This is used here (and in Nintendo's kernel) only for debugging, and does not impact
788 // any behavior.
789 u64 physical_memory_mapped = 0;
790
745 Core::System& system; 791 Core::System& system;
746}; 792};
747} // namespace Kernel 793} // namespace Kernel
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index cd32c65d3..7c18c27b3 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,4 +1,5 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache.h
2 dma_pusher.cpp 3 dma_pusher.cpp
3 dma_pusher.h 4 dma_pusher.h
4 debug_utils/debug_utils.cpp 5 debug_utils/debug_utils.cpp
@@ -43,8 +44,6 @@ add_library(video_core STATIC
43 renderer_opengl/gl_device.h 44 renderer_opengl/gl_device.h
44 renderer_opengl/gl_framebuffer_cache.cpp 45 renderer_opengl/gl_framebuffer_cache.cpp
45 renderer_opengl/gl_framebuffer_cache.h 46 renderer_opengl/gl_framebuffer_cache.h
46 renderer_opengl/gl_global_cache.cpp
47 renderer_opengl/gl_global_cache.h
48 renderer_opengl/gl_rasterizer.cpp 47 renderer_opengl/gl_rasterizer.cpp
49 renderer_opengl/gl_rasterizer.h 48 renderer_opengl/gl_rasterizer.h
50 renderer_opengl/gl_resource_manager.cpp 49 renderer_opengl/gl_resource_manager.cpp
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h
new file mode 100644
index 000000000..6f868b8b4
--- /dev/null
+++ b/src/video_core/buffer_cache.h
@@ -0,0 +1,299 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <unordered_set>
12#include <utility>
13#include <vector>
14
15#include "common/alignment.h"
16#include "common/common_types.h"
17#include "core/core.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_cache.h"
20
21namespace VideoCore {
22class RasterizerInterface;
23}
24
25namespace VideoCommon {
26
27template <typename BufferStorageType>
28class CachedBuffer final : public RasterizerCacheObject {
29public:
30 explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
31 : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
32 ~CachedBuffer() override = default;
33
34 VAddr GetCpuAddr() const override {
35 return cpu_addr;
36 }
37
38 std::size_t GetSizeInBytes() const override {
39 return size;
40 }
41
42 u8* GetWritableHostPtr() const {
43 return host_ptr;
44 }
45
46 std::size_t GetSize() const {
47 return size;
48 }
49
50 std::size_t GetCapacity() const {
51 return capacity;
52 }
53
54 bool IsInternalized() const {
55 return is_internal;
56 }
57
58 const BufferStorageType& GetBuffer() const {
59 return buffer;
60 }
61
62 void SetSize(std::size_t new_size) {
63 size = new_size;
64 }
65
66 void SetInternalState(bool is_internal_) {
67 is_internal = is_internal_;
68 }
69
70 BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
71 capacity = new_capacity;
72 std::swap(buffer, buffer_);
73 return buffer_;
74 }
75
76private:
77 u8* host_ptr{};
78 VAddr cpu_addr{};
79 std::size_t size{};
80 std::size_t capacity{};
81 bool is_internal{};
82 BufferStorageType buffer;
83};
84
85template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
86class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
87public:
88 using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
89 using BufferInfo = std::pair<const BufferType*, u64>;
90
91 explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
92 std::unique_ptr<StreamBuffer> stream_buffer)
93 : RasterizerCache<Buffer>{rasterizer}, system{system},
94 stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
95 this->stream_buffer->GetHandle()} {}
96 ~BufferCache() = default;
97
98 void Unregister(const Buffer& entry) override {
99 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
100 if (entry->IsInternalized()) {
101 internalized_entries.erase(entry->GetCacheAddr());
102 }
103 ReserveBuffer(entry);
104 RasterizerCache<Buffer>::Unregister(entry);
105 }
106
107 void TickFrame() {
108 marked_for_destruction_index =
109 (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
110 MarkedForDestruction().clear();
111 }
112
113 BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
114 bool internalize = false, bool is_written = false) {
115 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
116
117 auto& memory_manager = system.GPU().MemoryManager();
118 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
119 if (!host_ptr) {
120 return {GetEmptyBuffer(size), 0};
121 }
122 const auto cache_addr = ToCacheAddr(host_ptr);
123
124 // Cache management is a big overhead, so only cache entries with a given size.
125 // TODO: Figure out which size is the best for given games.
126 constexpr std::size_t max_stream_size = 0x800;
127 if (!internalize && size < max_stream_size &&
128 internalized_entries.find(cache_addr) == internalized_entries.end()) {
129 return StreamBufferUpload(host_ptr, size, alignment);
130 }
131
132 auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
133 if (!entry) {
134 return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
135 }
136
137 if (entry->GetSize() < size) {
138 IncreaseBufferSize(entry, size);
139 }
140 if (is_written) {
141 entry->MarkAsModified(true, *this);
142 }
143 return {ToHandle(entry->GetBuffer()), 0};
144 }
145
146 /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
147 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
148 std::size_t alignment = 4) {
149 std::lock_guard lock{RasterizerCache<Buffer>::mutex};
150 return StreamBufferUpload(raw_pointer, size, alignment);
151 }
152
153 void Map(std::size_t max_size) {
154 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
155 buffer_offset = buffer_offset_base;
156 }
157
158 /// Finishes the upload stream, returns true on bindings invalidation.
159 bool Unmap() {
160 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
161 return std::exchange(invalidated, false);
162 }
163
164 virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
165
166protected:
167 void FlushObjectInner(const Buffer& entry) override {
168 DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
169 }
170
171 virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
172
173 virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
174
175 virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
176 std::size_t size, const u8* data) = 0;
177
178 virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
179 std::size_t size, u8* data) = 0;
180
181 virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
182 std::size_t src_offset, std::size_t dst_offset,
183 std::size_t size) = 0;
184
185private:
186 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
187 std::size_t alignment) {
188 AlignBuffer(alignment);
189 const std::size_t uploaded_offset = buffer_offset;
190 std::memcpy(buffer_ptr, raw_pointer, size);
191
192 buffer_ptr += size;
193 buffer_offset += size;
194 return {&stream_buffer_handle, uploaded_offset};
195 }
196
197 BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
198 bool internalize, bool is_written) {
199 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
200 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
201 ASSERT(cpu_addr);
202
203 auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
204 entry->SetSize(size);
205 entry->SetInternalState(internalize);
206 RasterizerCache<Buffer>::Register(entry);
207
208 if (internalize) {
209 internalized_entries.emplace(ToCacheAddr(host_ptr));
210 }
211 if (is_written) {
212 entry->MarkAsModified(true, *this);
213 }
214
215 if (entry->GetCapacity() < size) {
216 MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
217 }
218
219 UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
220 return {ToHandle(entry->GetBuffer()), 0};
221 }
222
223 void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
224 const std::size_t old_size = entry->GetSize();
225 if (entry->GetCapacity() < new_size) {
226 const auto& old_buffer = entry->GetBuffer();
227 auto new_buffer = CreateBuffer(new_size);
228
229 // Copy bits from the old buffer to the new buffer.
230 CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
231 MarkedForDestruction().push_back(
232 entry->ExchangeBuffer(std::move(new_buffer), new_size));
233
234 // This buffer could have been used
235 invalidated = true;
236 }
237 // Upload the new bits.
238 const std::size_t size_diff = new_size - old_size;
239 UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
240
241 // Update entry's size in the object and in the cache.
242 Unregister(entry);
243
244 entry->SetSize(new_size);
245 RasterizerCache<Buffer>::Register(entry);
246 }
247
248 Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
249 if (auto entry = TryGetReservedBuffer(host_ptr)) {
250 return entry;
251 }
252 return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
253 }
254
255 Buffer TryGetReservedBuffer(u8* host_ptr) {
256 const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
257 if (it == buffer_reserve.end()) {
258 return {};
259 }
260 auto& reserve = it->second;
261 auto entry = reserve.back();
262 reserve.pop_back();
263 return entry;
264 }
265
266 void ReserveBuffer(Buffer entry) {
267 buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
268 }
269
270 void AlignBuffer(std::size_t alignment) {
271 // Align the offset, not the mapped pointer
272 const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
273 buffer_ptr += offset_aligned - buffer_offset;
274 buffer_offset = offset_aligned;
275 }
276
277 std::vector<BufferStorageType>& MarkedForDestruction() {
278 return marked_for_destruction_ring_buffer[marked_for_destruction_index];
279 }
280
281 Core::System& system;
282
283 std::unique_ptr<StreamBuffer> stream_buffer;
284 BufferType stream_buffer_handle{};
285
286 bool invalidated = false;
287
288 u8* buffer_ptr = nullptr;
289 u64 buffer_offset = 0;
290 u64 buffer_offset_base = 0;
291
292 std::size_t marked_for_destruction_index = 0;
293 std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
294
295 std::unordered_set<CacheAddr> internalized_entries;
296 std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
297};
298
299} // namespace VideoCommon
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 13e314944..8d15c8a48 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -67,6 +67,7 @@ public:
67 static constexpr std::size_t MaxShaderStage = 5; 67 static constexpr std::size_t MaxShaderStage = 5;
68 // Maximum number of const buffers per shader stage. 68 // Maximum number of const buffers per shader stage.
69 static constexpr std::size_t MaxConstBuffers = 18; 69 static constexpr std::size_t MaxConstBuffers = 18;
70 static constexpr std::size_t MaxConstBufferSize = 0x10000;
70 71
71 enum class QueryMode : u32 { 72 enum class QueryMode : u32 {
72 Write = 0, 73 Write = 0,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c3055602b..79d469b88 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -78,7 +78,7 @@ union Attribute {
78 constexpr explicit Attribute(u64 value) : value(value) {} 78 constexpr explicit Attribute(u64 value) : value(value) {}
79 79
80 enum class Index : u64 { 80 enum class Index : u64 {
81 PointSize = 6, 81 LayerViewportPointSize = 6,
82 Position = 7, 82 Position = 7,
83 Attribute_0 = 8, 83 Attribute_0 = 8,
84 Attribute_31 = 39, 84 Attribute_31 = 39,
@@ -1278,6 +1278,7 @@ union Instruction {
1278 union { 1278 union {
1279 BitField<49, 1, u64> nodep_flag; 1279 BitField<49, 1, u64> nodep_flag;
1280 BitField<53, 4, u64> texture_info; 1280 BitField<53, 4, u64> texture_info;
1281 BitField<59, 1, u64> fp32_flag;
1281 1282
1282 TextureType GetTextureType() const { 1283 TextureType GetTextureType() const {
1283 // The TLDS instruction has a weird encoding for the texture type. 1284 // The TLDS instruction has a weird encoding for the texture type.
@@ -1776,7 +1777,7 @@ private:
1776 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), 1777 INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
1777 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), 1778 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1778 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), 1779 INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
1779 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), 1780 INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
1780 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), 1781 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1781 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), 1782 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1782 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), 1783 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 52706505b..1b4975498 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
31 31
32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { 32GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} {
33 auto& rasterizer{renderer.Rasterizer()}; 33 auto& rasterizer{renderer.Rasterizer()};
34 memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); 34 memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); 35 dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); 36 maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); 37 fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index c766ed692..9f59a2dc1 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -4,14 +4,18 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "common/microprofile.h"
7#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
8#include "video_core/macro_interpreter.h" 9#include "video_core/macro_interpreter.h"
9 10
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12
10namespace Tegra { 13namespace Tegra {
11 14
12MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 15MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
13 16
14void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { 17void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
18 MICROPROFILE_SCOPE(MacroInterp);
15 Reset(); 19 Reset();
16 registers[1] = parameters[0]; 20 registers[1] = parameters[0];
17 this->parameters = std::move(parameters); 21 this->parameters = std::move(parameters);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 322453116..bffae940c 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,13 +5,17 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/core.h"
9#include "core/hle/kernel/process.h"
10#include "core/hle/kernel/vm_manager.h"
8#include "core/memory.h" 11#include "core/memory.h"
9#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
11 14
12namespace Tegra { 15namespace Tegra {
13 16
14MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { 17MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
18 : rasterizer{rasterizer}, system{system} {
15 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 19 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
16 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 20 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
17 Common::PageType::Unmapped); 21 Common::PageType::Unmapped);
@@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
49 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; 53 const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)};
50 54
51 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 55 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
56 ASSERT(system.CurrentProcess()
57 ->VMManager()
58 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
59 Kernel::MemoryAttribute::DeviceMapped)
60 .IsSuccess());
52 61
53 return gpu_addr; 62 return gpu_addr;
54} 63}
@@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size)
59 const u64 aligned_size{Common::AlignUp(size, page_size)}; 68 const u64 aligned_size{Common::AlignUp(size, page_size)};
60 69
61 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); 70 MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr);
62 71 ASSERT(system.CurrentProcess()
72 ->VMManager()
73 .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped,
74 Kernel::MemoryAttribute::DeviceMapped)
75 .IsSuccess());
63 return gpu_addr; 76 return gpu_addr;
64} 77}
65 78
@@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
68 81
69 const u64 aligned_size{Common::AlignUp(size, page_size)}; 82 const u64 aligned_size{Common::AlignUp(size, page_size)};
70 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; 83 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
85 ASSERT(cpu_addr);
71 86
72 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); 87 rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
73 UnmapRange(gpu_addr, aligned_size); 88 UnmapRange(gpu_addr, aligned_size);
89 ASSERT(system.CurrentProcess()
90 ->VMManager()
91 .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped,
92 Kernel::MemoryAttribute::None)
93 .IsSuccess());
74 94
75 return gpu_addr; 95 return gpu_addr;
76} 96}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 43a84bd52..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -14,6 +14,10 @@ namespace VideoCore {
14class RasterizerInterface; 14class RasterizerInterface;
15} 15}
16 16
17namespace Core {
18class System;
19}
20
17namespace Tegra { 21namespace Tegra {
18 22
19/** 23/**
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
47 51
48class MemoryManager final { 52class MemoryManager final {
49public: 53public:
50 explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); 54 explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
51 ~MemoryManager(); 55 ~MemoryManager();
52 56
53 GPUVAddr AllocateSpace(u64 size, u64 align); 57 GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -173,6 +177,8 @@ private:
173 Common::PageTable page_table{page_bits}; 177 Common::PageTable page_table{page_bits};
174 VMAMap vma_map; 178 VMAMap vma_map;
175 VideoCore::RasterizerInterface& rasterizer; 179 VideoCore::RasterizerInterface& rasterizer;
180
181 Core::System& system;
176}; 182};
177 183
178} // namespace Tegra 184} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5ee4f8e8e..2b7367568 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,6 +47,9 @@ public:
47 /// and invalidated 47 /// and invalidated
48 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 48 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
49 49
50 /// Notify rasterizer that a frame is about to finish
51 virtual void TickFrame() = 0;
52
50 /// Attempt to use a faster method to perform a surface copy 53 /// Attempt to use a faster method to perform a surface copy
51 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 54 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
52 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 55 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2b9bd142e..2a9b523f5 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -2,103 +2,57 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring>
6#include <memory> 5#include <memory>
7 6
8#include "common/alignment.h" 7#include <glad/glad.h>
9#include "core/core.h" 8
10#include "video_core/memory_manager.h" 9#include "common/assert.h"
11#include "video_core/renderer_opengl/gl_buffer_cache.h" 10#include "video_core/renderer_opengl/gl_buffer_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h"
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, 16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
17 std::size_t alignment, u8* host_ptr) 17 std::size_t stream_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, 18 : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{
19 alignment{alignment} {} 19 rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
20
21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
23
24GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
25 bool cache) {
26 std::lock_guard lock{mutex};
27 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
28
29 // Cache management is a big overhead, so only cache entries with a given size.
30 // TODO: Figure out which size is the best for given games.
31 cache &= size >= 2048;
32
33 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
34 if (cache) {
35 auto entry = TryGet(host_ptr);
36 if (entry) {
37 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
38 return entry->GetOffset();
39 }
40 Unregister(entry);
41 }
42 }
43 20
44 AlignBuffer(alignment); 21OGLBufferCache::~OGLBufferCache() = default;
45 const GLintptr uploaded_offset = buffer_offset;
46 22
47 if (!host_ptr) { 23OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) {
48 return uploaded_offset; 24 OGLBuffer buffer;
49 } 25 buffer.Create();
50 26 glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
51 std::memcpy(buffer_ptr, host_ptr, size); 27 return buffer;
52 buffer_ptr += size;
53 buffer_offset += size;
54
55 if (cache) {
56 auto entry = std::make_shared<CachedBufferEntry>(
57 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
58 Register(entry);
59 }
60
61 return uploaded_offset;
62} 28}
63 29
64GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, 30const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) {
65 std::size_t alignment) { 31 return &buffer.handle;
66 std::lock_guard lock{mutex};
67 AlignBuffer(alignment);
68 std::memcpy(buffer_ptr, raw_pointer, size);
69 const GLintptr uploaded_offset = buffer_offset;
70
71 buffer_ptr += size;
72 buffer_offset += size;
73 return uploaded_offset;
74} 32}
75 33
76bool OGLBufferCache::Map(std::size_t max_size) { 34const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
77 bool invalidate; 35 static const GLuint null_buffer = 0;
78 std::tie(buffer_ptr, buffer_offset_base, invalidate) = 36 return &null_buffer;
79 stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
80 buffer_offset = buffer_offset_base;
81
82 if (invalidate) {
83 InvalidateAll();
84 }
85 return invalidate;
86} 37}
87 38
88void OGLBufferCache::Unmap() { 39void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
89 stream_buffer.Unmap(buffer_offset - buffer_offset_base); 40 const u8* data) {
41 glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
42 static_cast<GLsizeiptr>(size), data);
90} 43}
91 44
92GLuint OGLBufferCache::GetHandle() const { 45void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset,
93 return stream_buffer.GetHandle(); 46 std::size_t size, u8* data) {
47 glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
48 static_cast<GLsizeiptr>(size), data);
94} 49}
95 50
96void OGLBufferCache::AlignBuffer(std::size_t alignment) { 51void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst,
97 // Align the offset, not the mapped pointer 52 std::size_t src_offset, std::size_t dst_offset,
98 const GLintptr offset_aligned = 53 std::size_t size) {
99 static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); 54 glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset),
100 buffer_ptr += offset_aligned - buffer_offset; 55 static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
101 buffer_offset = offset_aligned;
102} 56}
103 57
104} // namespace OpenGL 58} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f2347581b..8c8ac4038 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,80 +4,44 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <cstddef>
8#include <memory> 7#include <memory>
9#include <tuple>
10 8
11#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache.h"
12#include "video_core/rasterizer_cache.h" 11#include "video_core/rasterizer_cache.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 12#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/gl_stream_buffer.h" 13#include "video_core/renderer_opengl/gl_stream_buffer.h"
15 14
15namespace Core {
16class System;
17}
18
16namespace OpenGL { 19namespace OpenGL {
17 20
21class OGLStreamBuffer;
18class RasterizerOpenGL; 22class RasterizerOpenGL;
19 23
20class CachedBufferEntry final : public RasterizerCacheObject { 24class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> {
21public:
22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
27 }
28
29 std::size_t GetSizeInBytes() const override {
30 return size;
31 }
32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
45private:
46 VAddr cpu_addr{};
47 std::size_t size{};
48 GLintptr offset{};
49 std::size_t alignment{};
50};
51
52class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
53public: 25public:
54 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); 26 explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
55 27 std::size_t stream_size);
56 /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been 28 ~OGLBufferCache();
57 /// allocated.
58 GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
59 bool cache = true);
60 29
61 /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. 30 const GLuint* GetEmptyBuffer(std::size_t) override;
62 GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
63
64 bool Map(std::size_t max_size);
65 void Unmap();
66
67 GLuint GetHandle() const;
68 31
69protected: 32protected:
70 void AlignBuffer(std::size_t alignment); 33 OGLBuffer CreateBuffer(std::size_t size) override;
34
35 const GLuint* ToHandle(const OGLBuffer& buffer) override;
71 36
72 // We do not have to flush this cache as things in it are never modified by us. 37 void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
73 void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} 38 const u8* data) override;
74 39
75private: 40 void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
76 OGLStreamBuffer stream_buffer; 41 u8* data) override;
77 42
78 u8* buffer_ptr = nullptr; 43 void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset,
79 GLintptr buffer_offset = 0; 44 std::size_t dst_offset, std::size_t size) override;
80 GLintptr buffer_offset_base = 0;
81}; 45};
82 46
83} // namespace OpenGL 47} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index a48e14d2e..85424a4c9 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -24,8 +24,10 @@ T GetInteger(GLenum pname) {
24 24
25Device::Device() { 25Device::Device() {
26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 26 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
27 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
27 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); 28 max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
28 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); 29 max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
30 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
29 has_variable_aoffi = TestVariableAoffi(); 31 has_variable_aoffi = TestVariableAoffi();
30 has_component_indexing_bug = TestComponentIndexingBug(); 32 has_component_indexing_bug = TestComponentIndexingBug();
31} 33}
@@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) {
34 uniform_buffer_alignment = 0; 36 uniform_buffer_alignment = 0;
35 max_vertex_attributes = 16; 37 max_vertex_attributes = 16;
36 max_varyings = 15; 38 max_varyings = 15;
39 has_vertex_viewport_layer = true;
37 has_variable_aoffi = true; 40 has_variable_aoffi = true;
38 has_component_indexing_bug = false; 41 has_component_indexing_bug = false;
39} 42}
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 8c8c93760..dc883722d 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -18,6 +18,10 @@ public:
18 return uniform_buffer_alignment; 18 return uniform_buffer_alignment;
19 } 19 }
20 20
21 std::size_t GetShaderStorageBufferAlignment() const {
22 return shader_storage_alignment;
23 }
24
21 u32 GetMaxVertexAttributes() const { 25 u32 GetMaxVertexAttributes() const {
22 return max_vertex_attributes; 26 return max_vertex_attributes;
23 } 27 }
@@ -26,6 +30,10 @@ public:
26 return max_varyings; 30 return max_varyings;
27 } 31 }
28 32
33 bool HasVertexViewportLayer() const {
34 return has_vertex_viewport_layer;
35 }
36
29 bool HasVariableAoffi() const { 37 bool HasVariableAoffi() const {
30 return has_variable_aoffi; 38 return has_variable_aoffi;
31 } 39 }
@@ -39,8 +47,10 @@ private:
39 static bool TestComponentIndexingBug(); 47 static bool TestComponentIndexingBug();
40 48
41 std::size_t uniform_buffer_alignment{}; 49 std::size_t uniform_buffer_alignment{};
50 std::size_t shader_storage_alignment{};
42 u32 max_vertex_attributes{}; 51 u32 max_vertex_attributes{};
43 u32 max_varyings{}; 52 u32 max_varyings{};
53 bool has_vertex_viewport_layer{};
44 bool has_variable_aoffi{}; 54 bool has_variable_aoffi{};
45 bool has_component_indexing_bug{}; 55 bool has_component_indexing_bug{};
46}; 56};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
deleted file mode 100644
index d5e385151..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <glad/glad.h>
6
7#include "common/logging/log.h"
8#include "core/core.h"
9#include "video_core/memory_manager.h"
10#include "video_core/renderer_opengl/gl_global_cache.h"
11#include "video_core/renderer_opengl/gl_rasterizer.h"
12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
13#include "video_core/renderer_opengl/utils.h"
14
15namespace OpenGL {
16
17CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size)
18 : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size},
19 max_size{max_size} {
20 buffer.Create();
21 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
22}
23
24CachedGlobalRegion::~CachedGlobalRegion() = default;
25
26void CachedGlobalRegion::Reload(u32 size_) {
27 size = size_;
28 if (size > max_size) {
29 size = max_size;
30 LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_,
31 max_size);
32 }
33 glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW);
34}
35
36void CachedGlobalRegion::Flush() {
37 LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr);
38 glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr);
39}
40
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
42 const auto search{reserve.find(addr)};
43 if (search == reserve.end()) {
44 return {};
45 }
46 return search->second;
47}
48
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr,
50 u32 size) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) {
53 // No reserved surface available, create a new one and reserve it
54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)};
56 ASSERT(cpu_addr);
57
58 region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size);
59 ReserveGlobalRegion(region);
60 }
61 region->Reload(size);
62 return region;
63}
64
65void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
66 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
67}
68
69GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
70 : RasterizerCache{rasterizer} {
71 GLint max_ssbo_size_;
72 glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_);
73 max_ssbo_size = static_cast<u32>(max_ssbo_size_);
74}
75
76GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
77 const GLShader::GlobalMemoryEntry& global_region,
78 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
79 std::lock_guard lock{mutex};
80
81 auto& gpu{Core::System::GetInstance().GPU()};
82 auto& memory_manager{gpu.MemoryManager()};
83 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
84 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
85 global_region.GetCbufOffset()};
86 const auto actual_addr{memory_manager.Read<u64>(addr)};
87 const auto size{memory_manager.Read<u32>(addr + 8)};
88
89 // Look up global region in the cache based on address
90 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
91 GlobalRegion region{TryGet(host_ptr)};
92
93 if (!region) {
94 // No global region found - create a new one
95 region = GetUncachedGlobalRegion(actual_addr, host_ptr, size);
96 Register(region);
97 }
98
99 return region;
100}
101
102} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
deleted file mode 100644
index 2d467a240..000000000
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ /dev/null
@@ -1,82 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <unordered_map>
9
10#include <glad/glad.h>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h"
17
18namespace OpenGL {
19
20namespace GLShader {
21class GlobalMemoryEntry;
22}
23
24class RasterizerOpenGL;
25class CachedGlobalRegion;
26using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27
28class CachedGlobalRegion final : public RasterizerCacheObject {
29public:
30 explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size);
31 ~CachedGlobalRegion();
32
33 VAddr GetCpuAddr() const override {
34 return cpu_addr;
35 }
36
37 std::size_t GetSizeInBytes() const override {
38 return size;
39 }
40
41 /// Gets the GL program handle for the buffer
42 GLuint GetBufferHandle() const {
43 return buffer.handle;
44 }
45
46 /// Reloads the global region from guest memory
47 void Reload(u32 size_);
48
49 void Flush();
50
51private:
52 VAddr cpu_addr{};
53 u8* host_ptr{};
54 u32 size{};
55 u32 max_size{};
56
57 OGLBuffer buffer;
58};
59
60class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
61public:
62 explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
63
64 /// Gets the current specified shader stage program
65 GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor,
66 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
67
68protected:
69 void FlushObjectInner(const GlobalRegion& object) override {
70 object->Flush();
71 }
72
73private:
74 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
75 GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size);
76 void ReserveGlobalRegion(GlobalRegion region);
77
78 std::unordered_map<CacheAddr, GlobalRegion> reserve;
79 u32 max_ssbo_size{};
80};
81
82} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f45a3c5ef..0bb5c068c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -20,6 +20,7 @@
20#include "core/hle/kernel/process.h" 20#include "core/hle/kernel/process.h"
21#include "core/settings.h" 21#include "core/settings.h"
22#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h"
23#include "video_core/renderer_opengl/gl_rasterizer.h" 24#include "video_core/renderer_opengl/gl_rasterizer.h"
24#include "video_core/renderer_opengl/gl_shader_cache.h" 25#include "video_core/renderer_opengl/gl_shader_cache.h"
25#include "video_core/renderer_opengl/gl_shader_gen.h" 26#include "video_core/renderer_opengl/gl_shader_gen.h"
@@ -80,11 +81,25 @@ struct DrawParameters {
80 } 81 }
81}; 82};
82 83
84static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
85 const GLShader::ConstBufferEntry& entry) {
86 if (!entry.IsIndirect()) {
87 return entry.GetSize();
88 }
89
90 if (buffer.size > Maxwell::MaxConstBufferSize) {
91 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
92 Maxwell::MaxConstBufferSize);
93 return Maxwell::MaxConstBufferSize;
94 }
95
96 return buffer.size;
97}
98
83RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 99RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
84 ScreenInfo& info) 100 ScreenInfo& info)
85 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, 101 : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
86 global_cache{*this}, system{system}, screen_info{info}, 102 system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
87 buffer_cache(*this, STREAM_BUFFER_SIZE) {
88 OpenGLState::ApplyDefaultState(); 103 OpenGLState::ApplyDefaultState();
89 104
90 shader_program_manager = std::make_unique<GLShader::ProgramManager>(); 105 shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -129,8 +144,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
129 state.draw.vertex_array = vao; 144 state.draw.vertex_array = vao;
130 state.ApplyVertexArrayState(); 145 state.ApplyVertexArrayState();
131 146
132 glVertexArrayElementBuffer(vao, buffer_cache.GetHandle());
133
134 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. 147 // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
135 // Enables the first 16 vertex attributes always, as we don't know which ones are actually 148 // Enables the first 16 vertex attributes always, as we don't know which ones are actually
136 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 149 // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
@@ -197,11 +210,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
197 210
198 ASSERT(end > start); 211 ASSERT(end > start);
199 const u64 size = end - start + 1; 212 const u64 size = end - start + 1;
200 const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); 213 const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
201 214
202 // Bind the vertex array to the buffer at the current offset. 215 // Bind the vertex array to the buffer at the current offset.
203 glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, 216 vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
204 vertex_array.stride); 217 vertex_array.stride);
205 218
206 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { 219 if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
207 // Enable vertex buffer instancing with the specified divisor. 220 // Enable vertex buffer instancing with the specified divisor.
@@ -215,7 +228,19 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
215 gpu.dirty_flags.vertex_array.reset(); 228 gpu.dirty_flags.vertex_array.reset();
216} 229}
217 230
218DrawParameters RasterizerOpenGL::SetupDraw() { 231GLintptr RasterizerOpenGL::SetupIndexBuffer() {
232 if (accelerate_draw != AccelDraw::Indexed) {
233 return 0;
234 }
235 MICROPROFILE_SCOPE(OpenGL_Index);
236 const auto& regs = system.GPU().Maxwell3D().regs;
237 const std::size_t size = CalculateIndexBufferSize();
238 const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
239 vertex_array_pushbuffer.SetIndexBuffer(buffer);
240 return offset;
241}
242
243DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) {
219 const auto& gpu = system.GPU().Maxwell3D(); 244 const auto& gpu = system.GPU().Maxwell3D();
220 const auto& regs = gpu.regs; 245 const auto& regs = gpu.regs;
221 const bool is_indexed = accelerate_draw == AccelDraw::Indexed; 246 const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
@@ -227,11 +252,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
227 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); 252 params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
228 253
229 if (is_indexed) { 254 if (is_indexed) {
230 MICROPROFILE_SCOPE(OpenGL_Index);
231 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); 255 params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
232 params.count = regs.index_array.count; 256 params.count = regs.index_array.count;
233 params.index_buffer_offset = 257 params.index_buffer_offset = index_buffer_offset;
234 buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
235 params.base_vertex = static_cast<GLint>(regs.vb_element_base); 258 params.base_vertex = static_cast<GLint>(regs.vb_element_base);
236 } else { 259 } else {
237 params.count = regs.vertex_buffer.count; 260 params.count = regs.vertex_buffer.count;
@@ -247,10 +270,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
247 BaseBindings base_bindings; 270 BaseBindings base_bindings;
248 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 271 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
249 272
250 // Prepare packed bindings
251 bind_ubo_pushbuffer.Setup(base_bindings.cbuf);
252 bind_ssbo_pushbuffer.Setup(base_bindings.gmem);
253
254 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 273 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
255 const auto& shader_config = gpu.regs.shader_config[index]; 274 const auto& shader_config = gpu.regs.shader_config[index];
256 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; 275 const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
@@ -271,12 +290,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
271 290
272 GLShader::MaxwellUniformData ubo{}; 291 GLShader::MaxwellUniformData ubo{};
273 ubo.SetFromRegs(gpu, stage); 292 ubo.SetFromRegs(gpu, stage);
274 const GLintptr offset = 293 const auto [buffer, offset] =
275 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 294 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
276 295
277 // Bind the emulation info buffer 296 // Bind the emulation info buffer
278 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, 297 bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
279 static_cast<GLsizeiptr>(sizeof(ubo)));
280 298
281 Shader shader{shader_cache.GetStageProgram(program)}; 299 Shader shader{shader_cache.GetStageProgram(program)};
282 300
@@ -321,9 +339,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
321 base_bindings = next_bindings; 339 base_bindings = next_bindings;
322 } 340 }
323 341
324 bind_ubo_pushbuffer.Bind();
325 bind_ssbo_pushbuffer.Bind();
326
327 SyncClipEnabled(clip_distances); 342 SyncClipEnabled(clip_distances);
328 343
329 gpu.dirty_flags.shaders = false; 344 gpu.dirty_flags.shaders = false;
@@ -634,26 +649,46 @@ void RasterizerOpenGL::DrawArrays() {
634 Maxwell::MaxShaderStage; 649 Maxwell::MaxShaderStage;
635 650
636 // Add space for at least 18 constant buffers 651 // Add space for at least 18 constant buffers
637 buffer_size += 652 buffer_size += Maxwell::MaxConstBuffers *
638 Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); 653 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
639 654
640 const bool invalidate = buffer_cache.Map(buffer_size); 655 // Prepare the vertex array.
641 if (invalidate) { 656 buffer_cache.Map(buffer_size);
642 // As all cached buffers are invalidated, we need to recheck their state.
643 gpu.dirty_flags.vertex_array.set();
644 }
645 657
658 // Prepare vertex array format.
646 const GLuint vao = SetupVertexFormat(); 659 const GLuint vao = SetupVertexFormat();
660 vertex_array_pushbuffer.Setup(vao);
661
662 // Upload vertex and index data.
647 SetupVertexBuffer(vao); 663 SetupVertexBuffer(vao);
664 const GLintptr index_buffer_offset = SetupIndexBuffer();
648 665
649 DrawParameters params = SetupDraw(); 666 // Setup draw parameters. It will automatically choose what glDraw* method to use.
667 const DrawParameters params = SetupDraw(index_buffer_offset);
668
669 // Prepare packed bindings.
670 bind_ubo_pushbuffer.Setup(0);
671 bind_ssbo_pushbuffer.Setup(0);
672
673 // Setup shaders and their used resources.
650 texture_cache.GuardSamplers(true); 674 texture_cache.GuardSamplers(true);
651 SetupShaders(params.primitive_mode); 675 SetupShaders(params.primitive_mode);
652 texture_cache.GuardSamplers(false); 676 texture_cache.GuardSamplers(false);
653 677
654 ConfigureFramebuffers(state); 678 ConfigureFramebuffers(state);
655 679
656 buffer_cache.Unmap(); 680 // Signal the buffer cache that we are not going to upload more things.
681 const bool invalidate = buffer_cache.Unmap();
682
683 // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
684 vertex_array_pushbuffer.Bind();
685 bind_ubo_pushbuffer.Bind();
686 bind_ssbo_pushbuffer.Bind();
687
688 if (invalidate) {
689 // As all cached buffers are invalidated, we need to recheck their state.
690 gpu.dirty_flags.vertex_array.set();
691 }
657 692
658 shader_program_manager->ApplyTo(state); 693 shader_program_manager->ApplyTo(state);
659 state.Apply(); 694 state.Apply();
@@ -675,7 +710,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
675 return; 710 return;
676 } 711 }
677 texture_cache.FlushRegion(addr, size); 712 texture_cache.FlushRegion(addr, size);
678 global_cache.FlushRegion(addr, size); 713 buffer_cache.FlushRegion(addr, size);
679} 714}
680 715
681void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 716void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
@@ -685,7 +720,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
685 } 720 }
686 texture_cache.InvalidateRegion(addr, size); 721 texture_cache.InvalidateRegion(addr, size);
687 shader_cache.InvalidateRegion(addr, size); 722 shader_cache.InvalidateRegion(addr, size);
688 global_cache.InvalidateRegion(addr, size);
689 buffer_cache.InvalidateRegion(addr, size); 723 buffer_cache.InvalidateRegion(addr, size);
690} 724}
691 725
@@ -696,6 +730,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
696 InvalidateRegion(addr, size); 730 InvalidateRegion(addr, size);
697} 731}
698 732
733void RasterizerOpenGL::TickFrame() {
734 buffer_cache.TickFrame();
735}
736
699bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 737bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
700 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 738 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
701 const Tegra::Engines::Fermi2D::Config& copy_config) { 739 const Tegra::Engines::Fermi2D::Config& copy_config) {
@@ -739,11 +777,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::Sh
739 MICROPROFILE_SCOPE(OpenGL_UBO); 777 MICROPROFILE_SCOPE(OpenGL_UBO);
740 const auto stage_index = static_cast<std::size_t>(stage); 778 const auto stage_index = static_cast<std::size_t>(stage);
741 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; 779 const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
742 const auto& entries = shader->GetShaderEntries().const_buffers;
743 780
744 // Upload only the enabled buffers from the 16 constbuffers of each shader stage 781 // Upload only the enabled buffers from the 16 constbuffers of each shader stage
745 for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 782 for (const auto& entry : shader->GetShaderEntries().const_buffers) {
746 const auto& entry = entries[bindpoint];
747 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); 783 SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
748 } 784 }
749} 785}
@@ -752,46 +788,34 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
752 const GLShader::ConstBufferEntry& entry) { 788 const GLShader::ConstBufferEntry& entry) {
753 if (!buffer.enabled) { 789 if (!buffer.enabled) {
754 // Set values to zero to unbind buffers 790 // Set values to zero to unbind buffers
755 bind_ubo_pushbuffer.Push(0, 0, 0); 791 bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
756 return; 792 return;
757 } 793 }
758 794
759 std::size_t size;
760 if (entry.IsIndirect()) {
761 // Buffer is accessed indirectly, so upload the entire thing
762 size = buffer.size;
763
764 if (size > MaxConstbufferSize) {
765 LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
766 MaxConstbufferSize);
767 size = MaxConstbufferSize;
768 }
769 } else {
770 // Buffer is accessed directly, upload just what we use
771 size = entry.GetSize();
772 }
773
774 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 795 // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
775 // UBO alignment requirements. 796 // UBO alignment requirements.
776 size = Common::AlignUp(size, sizeof(GLvec4)); 797 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
777 ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
778 798
779 const std::size_t alignment = device.GetUniformBufferAlignment(); 799 const auto alignment = device.GetUniformBufferAlignment();
780 const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); 800 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
781 bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); 801 bind_ubo_pushbuffer.Push(cbuf, offset, size);
782} 802}
783 803
784void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, 804void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
785 const Shader& shader) { 805 const Shader& shader) {
786 const auto& entries = shader->GetShaderEntries().global_memory_entries; 806 auto& gpu{system.GPU()};
787 for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { 807 auto& memory_manager{gpu.MemoryManager()};
788 const auto& entry{entries[bindpoint]}; 808 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]};
789 const auto& region{global_cache.GetGlobalRegion(entry, stage)}; 809 const auto alignment{device.GetShaderStorageBufferAlignment()};
790 if (entry.IsWritten()) { 810
791 region->MarkAsModified(true, global_cache); 811 for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
792 } 812 const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
793 bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, 813 const auto actual_addr{memory_manager.Read<u64>(addr)};
794 static_cast<GLsizeiptr>(region->GetSizeInBytes())); 814 const auto size{memory_manager.Read<u32>(addr + 8)};
815
816 const auto [ssbo, buffer_offset] =
817 buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten());
818 bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
795 } 819 }
796} 820}
797 821
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d238c1257..40b571d58 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -24,7 +24,6 @@
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 26#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_global_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 27#include "video_core/renderer_opengl/gl_resource_manager.h"
29#include "video_core/renderer_opengl/gl_sampler_cache.h" 28#include "video_core/renderer_opengl/gl_sampler_cache.h"
30#include "video_core/renderer_opengl/gl_shader_cache.h" 29#include "video_core/renderer_opengl/gl_shader_cache.h"
@@ -63,6 +62,7 @@ public:
63 void FlushRegion(CacheAddr addr, u64 size) override; 62 void FlushRegion(CacheAddr addr, u64 size) override;
64 void InvalidateRegion(CacheAddr addr, u64 size) override; 63 void InvalidateRegion(CacheAddr addr, u64 size) override;
65 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 64 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
65 void TickFrame() override;
66 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 66 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
67 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 67 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
68 const Tegra::Engines::Fermi2D::Config& copy_config) override; 68 const Tegra::Engines::Fermi2D::Config& copy_config) override;
@@ -73,11 +73,6 @@ public:
73 void LoadDiskResources(const std::atomic_bool& stop_loading, 73 void LoadDiskResources(const std::atomic_bool& stop_loading,
74 const VideoCore::DiskResourceLoadCallback& callback) override; 74 const VideoCore::DiskResourceLoadCallback& callback) override;
75 75
76 /// Maximum supported size that a constbuffer can have in bytes.
77 static constexpr std::size_t MaxConstbufferSize = 0x10000;
78 static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
79 "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
80
81private: 76private:
82 struct FramebufferConfigState { 77 struct FramebufferConfigState {
83 bool using_color_fb{}; 78 bool using_color_fb{};
@@ -191,7 +186,6 @@ private:
191 186
192 TextureCacheOpenGL texture_cache; 187 TextureCacheOpenGL texture_cache;
193 ShaderCacheOpenGL shader_cache; 188 ShaderCacheOpenGL shader_cache;
194 GlobalRegionCacheOpenGL global_cache;
195 SamplerCacheOpenGL sampler_cache; 189 SamplerCacheOpenGL sampler_cache;
196 FramebufferCacheOpenGL framebuffer_cache; 190 FramebufferCacheOpenGL framebuffer_cache;
197 191
@@ -210,6 +204,7 @@ private:
210 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 204 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
211 OGLBufferCache buffer_cache; 205 OGLBufferCache buffer_cache;
212 206
207 VertexArrayPushBuffer vertex_array_pushbuffer;
213 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; 208 BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
214 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; 209 BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
215 210
@@ -222,7 +217,9 @@ private:
222 217
223 void SetupVertexBuffer(GLuint vao); 218 void SetupVertexBuffer(GLuint vao);
224 219
225 DrawParameters SetupDraw(); 220 GLintptr SetupIndexBuffer();
221
222 DrawParameters SetupDraw(GLintptr index_buffer_offset);
226 223
227 void SetupShaders(GLenum primitive_mode); 224 void SetupShaders(GLenum primitive_mode);
228 225
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 5d76ee12d..32dd9eae7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -190,8 +190,11 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
190 const auto texture_buffer_usage{variant.texture_buffer_usage}; 190 const auto texture_buffer_usage{variant.texture_buffer_usage};
191 191
192 std::string source = "#version 430 core\n" 192 std::string source = "#version 430 core\n"
193 "#extension GL_ARB_separate_shader_objects : enable\n\n"; 193 "#extension GL_ARB_separate_shader_objects : enable\n";
194 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 194 if (entries.shader_viewport_layer_array) {
195 source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
196 }
197 source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
195 198
196 for (const auto& cbuf : entries.const_buffers) { 199 for (const auto& cbuf : entries.const_buffers) {
197 source += 200 source +=
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index bfc975a04..119073776 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -14,6 +14,7 @@
14#include "common/alignment.h" 14#include "common/alignment.h"
15#include "common/assert.h" 15#include "common/assert.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/logging/log.h"
17#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
18#include "video_core/renderer_opengl/gl_device.h" 19#include "video_core/renderer_opengl/gl_device.h"
19#include "video_core/renderer_opengl/gl_rasterizer.h" 20#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -46,7 +47,7 @@ using TextureArgument = std::pair<Type, Node>;
46using TextureIR = std::variant<TextureAoffi, TextureArgument>; 47using TextureIR = std::variant<TextureAoffi, TextureArgument>;
47 48
48constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 49constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
49 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 50 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
50 51
51class ShaderWriter { 52class ShaderWriter {
52public: 53public:
@@ -246,6 +247,8 @@ public:
246 usage.is_read, usage.is_written); 247 usage.is_read, usage.is_written);
247 } 248 }
248 entries.clip_distances = ir.GetClipDistances(); 249 entries.clip_distances = ir.GetClipDistances();
250 entries.shader_viewport_layer_array =
251 stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex());
249 entries.shader_length = ir.GetLength(); 252 entries.shader_length = ir.GetLength();
250 return entries; 253 return entries;
251 } 254 }
@@ -282,22 +285,35 @@ private:
282 } 285 }
283 286
284 void DeclareVertexRedeclarations() { 287 void DeclareVertexRedeclarations() {
285 bool clip_distances_declared = false;
286
287 code.AddLine("out gl_PerVertex {{"); 288 code.AddLine("out gl_PerVertex {{");
288 ++code.scope; 289 ++code.scope;
289 290
290 code.AddLine("vec4 gl_Position;"); 291 code.AddLine("vec4 gl_Position;");
291 292
292 for (const auto o : ir.GetOutputAttributes()) { 293 for (const auto attribute : ir.GetOutputAttributes()) {
293 if (o == Attribute::Index::PointSize) 294 if (attribute == Attribute::Index::ClipDistances0123 ||
294 code.AddLine("float gl_PointSize;"); 295 attribute == Attribute::Index::ClipDistances4567) {
295 if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 ||
296 o == Attribute::Index::ClipDistances4567)) {
297 code.AddLine("float gl_ClipDistance[];"); 296 code.AddLine("float gl_ClipDistance[];");
298 clip_distances_declared = true; 297 break;
299 } 298 }
300 } 299 }
300 if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) {
301 if (ir.UsesLayer()) {
302 code.AddLine("int gl_Layer;");
303 }
304 if (ir.UsesViewportIndex()) {
305 code.AddLine("int gl_ViewportIndex;");
306 }
307 } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex &&
308 !device.HasVertexViewportLayer()) {
309 LOG_ERROR(
310 Render_OpenGL,
311 "GL_ARB_shader_viewport_layer_array is not available and its required by a shader");
312 }
313
314 if (ir.UsesPointSize()) {
315 code.AddLine("float gl_PointSize;");
316 }
301 317
302 --code.scope; 318 --code.scope;
303 code.AddLine("}};"); 319 code.AddLine("}};");
@@ -805,6 +821,45 @@ private:
805 return CastOperand(VisitOperand(operation, operand_index), type); 821 return CastOperand(VisitOperand(operation, operand_index), type);
806 } 822 }
807 823
824 std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
825 switch (const auto attribute = abuf->GetIndex()) {
826 case Attribute::Index::Position:
827 return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
828 case Attribute::Index::LayerViewportPointSize:
829 switch (abuf->GetElement()) {
830 case 0:
831 UNIMPLEMENTED();
832 return {};
833 case 1:
834 if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) {
835 return {};
836 }
837 return std::make_pair("gl_Layer", true);
838 case 2:
839 if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) {
840 return {};
841 }
842 return std::make_pair("gl_ViewportIndex", true);
843 case 3:
844 UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
845 return std::make_pair("gl_PointSize", false);
846 }
847 return {};
848 case Attribute::Index::ClipDistances0123:
849 return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
850 case Attribute::Index::ClipDistances4567:
851 return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
852 false);
853 default:
854 if (IsGenericAttribute(attribute)) {
855 return std::make_pair(
856 GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
857 }
858 UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
859 return {};
860 }
861 }
862
808 std::string CastOperand(const std::string& value, Type type) const { 863 std::string CastOperand(const std::string& value, Type type) const {
809 switch (type) { 864 switch (type) {
810 case Type::Bool: 865 case Type::Bool:
@@ -1001,6 +1056,8 @@ private:
1001 const Node& src = operation[1]; 1056 const Node& src = operation[1];
1002 1057
1003 std::string target; 1058 std::string target;
1059 bool is_integer = false;
1060
1004 if (const auto gpr = std::get_if<GprNode>(&*dest)) { 1061 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1005 if (gpr->GetIndex() == Register::ZeroIndex) { 1062 if (gpr->GetIndex() == Register::ZeroIndex) {
1006 // Writing to Register::ZeroIndex is a no op 1063 // Writing to Register::ZeroIndex is a no op
@@ -1009,26 +1066,12 @@ private:
1009 target = GetRegister(gpr->GetIndex()); 1066 target = GetRegister(gpr->GetIndex());
1010 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { 1067 } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
1011 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); 1068 UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
1012 1069 const auto result = GetOutputAttribute(abuf);
1013 target = [&]() -> std::string { 1070 if (!result) {
1014 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { 1071 return {};
1015 case Attribute::Index::Position: 1072 }
1016 return "gl_Position"s + GetSwizzle(abuf->GetElement()); 1073 target = result->first;
1017 case Attribute::Index::PointSize: 1074 is_integer = result->second;
1018 return "gl_PointSize";
1019 case Attribute::Index::ClipDistances0123:
1020 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement());
1021 case Attribute::Index::ClipDistances4567:
1022 return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4);
1023 default:
1024 if (IsGenericAttribute(attribute)) {
1025 return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement());
1026 }
1027 UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
1028 static_cast<u32>(attribute));
1029 return "0";
1030 }
1031 }();
1032 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { 1075 } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
1033 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); 1076 target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
1034 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1077 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
@@ -1040,7 +1083,11 @@ private:
1040 UNREACHABLE_MSG("Assign called without a proper target"); 1083 UNREACHABLE_MSG("Assign called without a proper target");
1041 } 1084 }
1042 1085
1043 code.AddLine("{} = {};", target, Visit(src)); 1086 if (is_integer) {
1087 code.AddLine("{} = ftoi({});", target, Visit(src));
1088 } else {
1089 code.AddLine("{} = {};", target, Visit(src));
1090 }
1044 return {}; 1091 return {};
1045 } 1092 }
1046 1093
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 14d11c7fc..02586736d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -78,6 +78,7 @@ struct ShaderEntries {
78 std::vector<ImageEntry> images; 78 std::vector<ImageEntry> images;
79 std::vector<GlobalMemoryEntry> global_memory_entries; 79 std::vector<GlobalMemoryEntry> global_memory_entries;
80 std::array<bool, Maxwell::NumClipDistances> clip_distances{}; 80 std::array<bool, Maxwell::NumClipDistances> clip_distances{};
81 bool shader_viewport_layer_array{};
81 std::size_t shader_length{}; 82 std::size_t shader_length{};
82}; 83};
83 84
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 10688397b..7893d1e26 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
373 } 373 }
374 } 374 }
375 375
376 bool shader_viewport_layer_array{};
377 if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
378 return {};
379 }
380 entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
381
376 u64 shader_length{}; 382 u64 shader_length{};
377 if (!LoadObjectFromPrecompiled(shader_length)) { 383 if (!LoadObjectFromPrecompiled(shader_length)) {
378 return {}; 384 return {};
@@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
445 } 451 }
446 } 452 }
447 453
454 if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
455 return false;
456 }
457
448 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { 458 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
449 return false; 459 return false;
450 } 460 }
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index d86e137ac..0eae98afe 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -6,8 +6,11 @@
6#include <glad/glad.h> 6#include <glad/glad.h>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/microprofile.h"
9#include "video_core/renderer_opengl/gl_state.h" 10#include "video_core/renderer_opengl/gl_state.h"
10 11
12MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
13
11namespace OpenGL { 14namespace OpenGL {
12 15
13using Maxwell = Tegra::Engines::Maxwell3D::Regs; 16using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const {
524} 527}
525 528
526void OpenGLState::Apply() const { 529void OpenGLState::Apply() const {
530 MICROPROFILE_SCOPE(OpenGL_State);
527 ApplyFramebufferState(); 531 ApplyFramebufferState();
528 ApplyVertexArrayState(); 532 ApplyVertexArrayState();
529 ApplyShaderProgram(); 533 ApplyShaderProgram();
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 08ae1a429..b1f6bc7c2 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType;
31 31
32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); 32MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128));
33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); 33MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128));
34MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35 MP_RGB(128, 192, 128));
34 36
35namespace { 37namespace {
36 38
@@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
535} 537}
536 538
537void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { 539void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) {
540 MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy);
538 const auto& src_params = src_surface->GetSurfaceParams(); 541 const auto& src_params = src_surface->GetSurfaceParams();
539 const auto& dst_params = dst_surface->GetSurfaceParams(); 542 const auto& dst_params = dst_surface->GetSurfaceParams();
540 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); 543 UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b142521ec..9ecdddb0d 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
101 101
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104/// Swap buffers (render frame)
105void RendererOpenGL::SwapBuffers( 104void RendererOpenGL::SwapBuffers(
106 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
107 106
@@ -130,6 +129,8 @@ void RendererOpenGL::SwapBuffers(
130 129
131 DrawScreen(render_window.GetFramebufferLayout()); 130 DrawScreen(render_window.GetFramebufferLayout());
132 131
132 rasterizer->TickFrame();
133
133 render_window.SwapBuffers(); 134 render_window.SwapBuffers();
134 } 135 }
135 136
@@ -262,7 +263,6 @@ void RendererOpenGL::CreateRasterizer() {
262 if (rasterizer) { 263 if (rasterizer) {
263 return; 264 return;
264 } 265 }
265 // Initialize sRGB Usage
266 OpenGLState::ClearsRGBUsed(); 266 OpenGLState::ClearsRGBUsed();
267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); 267 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
268} 268}
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 68c36988d..c504a2c1a 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -13,29 +13,67 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
17
18VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
19
20void VertexArrayPushBuffer::Setup(GLuint vao_) {
21 vao = vao_;
22 index_buffer = nullptr;
23 vertex_buffers.clear();
24}
25
26void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
27 index_buffer = buffer;
28}
29
30void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
31 GLintptr offset, GLsizei stride) {
32 vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
33}
34
35void VertexArrayPushBuffer::Bind() {
36 if (index_buffer) {
37 glVertexArrayElementBuffer(vao, *index_buffer);
38 }
39
40 // TODO(Rodrigo): Find a way to ARB_multi_bind this
41 for (const auto& entry : vertex_buffers) {
42 glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
43 entry.stride);
44 }
45}
46
16BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} 47BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
17 48
18BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; 49BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
19 50
20void BindBuffersRangePushBuffer::Setup(GLuint first_) { 51void BindBuffersRangePushBuffer::Setup(GLuint first_) {
21 first = first_; 52 first = first_;
22 buffers.clear(); 53 buffer_pointers.clear();
23 offsets.clear(); 54 offsets.clear();
24 sizes.clear(); 55 sizes.clear();
25} 56}
26 57
27void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { 58void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) {
28 buffers.push_back(buffer); 59 buffer_pointers.push_back(buffer);
29 offsets.push_back(offset); 60 offsets.push_back(offset);
30 sizes.push_back(size); 61 sizes.push_back(size);
31} 62}
32 63
33void BindBuffersRangePushBuffer::Bind() const { 64void BindBuffersRangePushBuffer::Bind() {
34 const std::size_t count{buffers.size()}; 65 // Ensure sizes are valid.
66 const std::size_t count{buffer_pointers.size()};
35 DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); 67 DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
36 if (count == 0) { 68 if (count == 0) {
37 return; 69 return;
38 } 70 }
71
72 // Dereference buffers.
73 buffers.resize(count);
74 std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
75 [](const GLuint* pointer) { return *pointer; });
76
39 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), 77 glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
40 sizes.data()); 78 sizes.data());
41} 79}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 4a752f3b4..6c2b45546 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,20 +11,49 @@
11 11
12namespace OpenGL { 12namespace OpenGL {
13 13
14class BindBuffersRangePushBuffer { 14class VertexArrayPushBuffer final {
15public: 15public:
16 BindBuffersRangePushBuffer(GLenum target); 16 explicit VertexArrayPushBuffer();
17 ~VertexArrayPushBuffer();
18
19 void Setup(GLuint vao_);
20
21 void SetIndexBuffer(const GLuint* buffer);
22
23 void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
24 GLsizei stride);
25
26 void Bind();
27
28private:
29 struct Entry {
30 GLuint binding_index{};
31 const GLuint* buffer{};
32 GLintptr offset{};
33 GLsizei stride{};
34 };
35
36 GLuint vao{};
37 const GLuint* index_buffer{};
38 std::vector<Entry> vertex_buffers;
39};
40
41class BindBuffersRangePushBuffer final {
42public:
43 explicit BindBuffersRangePushBuffer(GLenum target);
17 ~BindBuffersRangePushBuffer(); 44 ~BindBuffersRangePushBuffer();
18 45
19 void Setup(GLuint first_); 46 void Setup(GLuint first_);
20 47
21 void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); 48 void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size);
22 49
23 void Bind() const; 50 void Bind();
24 51
25private: 52private:
26 GLenum target; 53 GLenum target{};
27 GLuint first; 54 GLuint first{};
55 std::vector<const GLuint*> buffer_pointers;
56
28 std::vector<GLuint> buffers; 57 std::vector<GLuint> buffers;
29 std::vector<GLintptr> offsets; 58 std::vector<GLintptr> offsets;
30 std::vector<GLsizeiptr> sizes; 59 std::vector<GLsizeiptr> sizes;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 1bb04607b..9b2d8e987 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -430,20 +430,17 @@ private:
430 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, 430 instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input,
431 t_in_uint, "instance_index"); 431 t_in_uint, "instance_index");
432 432
433 bool is_point_size_declared = false;
434 bool is_clip_distances_declared = false; 433 bool is_clip_distances_declared = false;
435 for (const auto index : ir.GetOutputAttributes()) { 434 for (const auto index : ir.GetOutputAttributes()) {
436 if (index == Attribute::Index::PointSize) { 435 if (index == Attribute::Index::ClipDistances0123 ||
437 is_point_size_declared = true; 436 index == Attribute::Index::ClipDistances4567) {
438 } else if (index == Attribute::Index::ClipDistances0123 ||
439 index == Attribute::Index::ClipDistances4567) {
440 is_clip_distances_declared = true; 437 is_clip_distances_declared = true;
441 } 438 }
442 } 439 }
443 440
444 std::vector<Id> members; 441 std::vector<Id> members;
445 members.push_back(t_float4); 442 members.push_back(t_float4);
446 if (is_point_size_declared) { 443 if (ir.UsesPointSize()) {
447 members.push_back(t_float); 444 members.push_back(t_float);
448 } 445 }
449 if (is_clip_distances_declared) { 446 if (is_clip_distances_declared) {
@@ -466,7 +463,7 @@ private:
466 463
467 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); 464 position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true);
468 point_size_index = 465 point_size_index =
469 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); 466 MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize());
470 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", 467 clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances",
471 is_clip_distances_declared); 468 is_clip_distances_declared);
472 469
@@ -712,7 +709,8 @@ private:
712 case Attribute::Index::Position: 709 case Attribute::Index::Position:
713 return AccessElement(t_out_float, per_vertex, position_index, 710 return AccessElement(t_out_float, per_vertex, position_index,
714 abuf->GetElement()); 711 abuf->GetElement());
715 case Attribute::Index::PointSize: 712 case Attribute::Index::LayerViewportPointSize:
713 UNIMPLEMENTED_IF(abuf->GetElement() != 3);
716 return AccessElement(t_out_float, per_vertex, point_size_index); 714 return AccessElement(t_out_float, per_vertex, point_size_index);
717 case Attribute::Index::ClipDistances0123: 715 case Attribute::Index::ClipDistances0123:
718 return AccessElement(t_out_float, per_vertex, clip_distances_index, 716 return AccessElement(t_out_float, per_vertex, clip_distances_index,
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 24f022cc0..77151a24b 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image
95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, 95const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
96 Tegra::Shader::ImageType type) { 96 Tegra::Shader::ImageType type) {
97 const Node image_register{GetRegister(reg)}; 97 const Node image_register{GetRegister(reg)};
98 const Node base_image{ 98 const auto [base_image, cbuf_index, cbuf_offset]{
99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; 99 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
100 const auto cbuf{std::get_if<CbufNode>(&*base_image)};
101 const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())};
102 const auto cbuf_offset{cbuf_offset_imm->GetValue()};
103 const auto cbuf_index{cbuf->GetIndex()};
104 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; 100 const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
105 101
106 // If this image has already been used, return the existing mapping. 102 // If this image has already been used, return the existing mapping.
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 80fc0ccfc..ab207a33b 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB
297 const auto addr_register{GetRegister(instr.gmem.gpr)}; 297 const auto addr_register{GetRegister(instr.gmem.gpr)};
298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 298 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
299 299
300 const Node base_address{ 300 const auto [base_address, index, offset] =
301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; 301 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
302 const auto cbuf = std::get_if<CbufNode>(&*base_address); 302 ASSERT(base_address != nullptr);
303 ASSERT(cbuf != nullptr); 303
304 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); 304 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
305 ASSERT(cbuf_offset_imm != nullptr); 305
306 const auto cbuf_offset = cbuf_offset_imm->GetValue(); 306 const GlobalMemoryBase descriptor{index, offset};
307
308 bb.push_back(
309 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
310
311 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
312 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 307 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
313 auto& usage = entry->second; 308 auto& usage = entry->second;
314 if (is_write) { 309 if (is_write) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index cb480be9b..e1ee5c190 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); 269 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
270 } 270 }
271 271
272 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); 272 const Node4 components = GetTldsCode(instr, texture_type, is_array);
273
274 if (instr.tlds.fp32_flag) {
275 WriteTexsInstructionFloat(bb, instr, components);
276 } else {
277 WriteTexsInstructionHalfFloat(bb, instr, components);
278 }
273 break; 279 break;
274 } 280 }
275 default: 281 default:
@@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
302const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
303 bool is_array, bool is_shadow) { 309 bool is_array, bool is_shadow) {
304 const Node sampler_register = GetRegister(reg); 310 const Node sampler_register = GetRegister(reg);
305 const Node base_sampler = 311 const auto [base_sampler, cbuf_index, cbuf_offset] =
306 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
307 const auto cbuf = std::get_if<CbufNode>(&*base_sampler); 313 ASSERT(base_sampler != nullptr);
308 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset());
309 ASSERT(cbuf_offset_imm != nullptr);
310 const auto cbuf_offset = cbuf_offset_imm->GetValue();
311 const auto cbuf_index = cbuf->GetIndex();
312 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
313 315
314 // If this sampler has already been used, return the existing mapping. 316 // If this sampler has already been used, return the existing mapping.
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 5994bfc4e..78bd1cf1e 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -61,7 +61,16 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
61 const auto [entry, is_new] = used_cbufs.try_emplace(index); 61 const auto [entry, is_new] = used_cbufs.try_emplace(index);
62 entry->second.MarkAsUsedIndirect(); 62 entry->second.MarkAsUsedIndirect();
63 63
64 const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); 64 const Node final_offset = [&]() {
65 // Attempt to inline constant buffer without a variable offset. This is done to allow
66 // tracking LDC calls.
67 if (const auto gpr = std::get_if<GprNode>(&*node)) {
68 if (gpr->GetIndex() == Register::ZeroIndex) {
69 return Immediate(offset);
70 }
71 }
72 return Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset));
73 }();
65 return MakeNode<CbufNode>(index, final_offset); 74 return MakeNode<CbufNode>(index, final_offset);
66} 75}
67 76
@@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres
89} 98}
90 99
91Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { 100Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
101 if (index == Attribute::Index::LayerViewportPointSize) {
102 switch (element) {
103 case 0:
104 UNIMPLEMENTED();
105 break;
106 case 1:
107 uses_layer = true;
108 break;
109 case 2:
110 uses_viewport_index = true;
111 break;
112 case 3:
113 uses_point_size = true;
114 break;
115 }
116 }
92 if (index == Attribute::Index::ClipDistances0123 || 117 if (index == Attribute::Index::ClipDistances0123 ||
93 index == Attribute::Index::ClipDistances4567) { 118 index == Attribute::Index::ClipDistances4567) {
94 const auto clip_index = 119 const auto clip_index =
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6145f0a70..126c78136 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -115,6 +115,18 @@ public:
115 return static_cast<std::size_t>(coverage_end * sizeof(u64)); 115 return static_cast<std::size_t>(coverage_end * sizeof(u64));
116 } 116 }
117 117
118 bool UsesLayer() const {
119 return uses_layer;
120 }
121
122 bool UsesViewportIndex() const {
123 return uses_viewport_index;
124 }
125
126 bool UsesPointSize() const {
127 return uses_point_size;
128 }
129
118 bool HasPhysicalAttributes() const { 130 bool HasPhysicalAttributes() const {
119 return uses_physical_attributes; 131 return uses_physical_attributes;
120 } 132 }
@@ -316,7 +328,7 @@ private:
316 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 328 void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
317 Node op_c, Node imm_lut, bool sets_cc); 329 Node op_c, Node imm_lut, bool sets_cc);
318 330
319 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 331 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
320 332
321 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 333 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
322 334
@@ -346,6 +358,9 @@ private:
346 std::set<Image> used_images; 358 std::set<Image> used_images;
347 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 359 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
348 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; 360 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
361 bool uses_layer{};
362 bool uses_viewport_index{};
363 bool uses_point_size{};
349 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes 364 bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
350 365
351 Tegra::Shader::Header header; 366 Tegra::Shader::Header header;
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index fc957d980..dc132a4a3 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -32,39 +32,44 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
32 } 32 }
33 return {}; 33 return {};
34} 34}
35} // namespace 35} // Anonymous namespace
36 36
37Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { 37std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
38 s64 cursor) const {
38 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 39 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
39 // Cbuf found, but it has to be immediate 40 // Constant buffer found, test if it's an immediate
40 return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; 41 const auto offset = cbuf->GetOffset();
42 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
43 return {tracked, cbuf->GetIndex(), immediate->GetValue()};
44 }
45 return {};
41 } 46 }
42 if (const auto gpr = std::get_if<GprNode>(&*tracked)) { 47 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
43 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { 48 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
44 return nullptr; 49 return {};
45 } 50 }
46 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same 51 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
47 // register that it uses as operand 52 // register that it uses as operand
48 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); 53 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
49 if (!source) { 54 if (!source) {
50 return nullptr; 55 return {};
51 } 56 }
52 return TrackCbuf(source, code, new_cursor); 57 return TrackCbuf(source, code, new_cursor);
53 } 58 }
54 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 59 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
55 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { 60 for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
56 if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { 61 if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) {
57 // Cbuf found in operand 62 // Cbuf found in operand.
58 return found; 63 return found;
59 } 64 }
60 } 65 }
61 return nullptr; 66 return {};
62 } 67 }
63 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { 68 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
64 const auto& conditional_code = conditional->GetCode(); 69 const auto& conditional_code = conditional->GetCode();
65 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); 70 return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
66 } 71 }
67 return nullptr; 72 return {};
68} 73}
69 74
70std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { 75std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 7a0fdb19b..6af9044ca 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs)
75 75
76 // Linear Surface check 76 // Linear Surface check
77 if (!params.is_tiled) { 77 if (!params.is_tiled) {
78 if (std::tie(params.width, params.height, params.pitch) == 78 if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) {
79 std::tie(rhs.width, rhs.height, rhs.pitch)) { 79 if (params.width == rhs.width) {
80 return MatchStructureResult::FullMatch; 80 return MatchStructureResult::FullMatch;
81 } else {
82 return MatchStructureResult::SemiMatch;
83 }
81 } 84 }
82 return MatchStructureResult::None; 85 return MatchStructureResult::None;
83 } 86 }
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 8ba386a8a..bcce8d863 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -200,8 +200,9 @@ public:
200 modification_tick = tick; 200 modification_tick = tick;
201 } 201 }
202 202
203 void MarkAsRenderTarget(const bool is_target) { 203 void MarkAsRenderTarget(const bool is_target, const u32 index) {
204 this->is_target = is_target; 204 this->is_target = is_target;
205 this->index = index;
205 } 206 }
206 207
207 void MarkAsPicked(const bool is_picked) { 208 void MarkAsPicked(const bool is_picked) {
@@ -221,6 +222,10 @@ public:
221 return is_target; 222 return is_target;
222 } 223 }
223 224
225 u32 GetRenderTarget() const {
226 return index;
227 }
228
224 bool IsRegistered() const { 229 bool IsRegistered() const {
225 return is_registered; 230 return is_registered;
226 } 231 }
@@ -307,10 +312,13 @@ private:
307 return view; 312 return view;
308 } 313 }
309 314
315 static constexpr u32 NO_RT = 0xFFFFFFFF;
316
310 bool is_modified{}; 317 bool is_modified{};
311 bool is_target{}; 318 bool is_target{};
312 bool is_registered{}; 319 bool is_registered{};
313 bool is_picked{}; 320 bool is_picked{};
321 u32 index{NO_RT};
314 u64 modification_tick{}; 322 u64 modification_tick{};
315}; 323};
316 324
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9c56e2b4f..fd5472451 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co
290 290
291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, 291std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size,
292 bool uncompressed) const { 292 bool uncompressed) const {
293 const bool tiled{as_host_size ? false : is_tiled};
294 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; 293 const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())};
295 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; 294 const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())};
296 const u32 depth{is_layered ? 1U : GetMipDepth(level)}; 295 const u32 depth{is_layered ? 1U : GetMipDepth(level)};
297 return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, 296 if (is_tiled) {
298 GetMipBlockHeight(level), GetMipBlockDepth(level)); 297 return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height,
298 depth, GetMipBlockHeight(level),
299 GetMipBlockDepth(level));
300 } else if (as_host_size || IsBuffer()) {
301 return GetBytesPerPixel() * width * height * depth;
302 } else {
303 // Linear Texture Case
304 return pitch * height * depth;
305 }
299} 306}
300 307
301bool SurfaceParams::operator==(const SurfaceParams& rhs) const { 308bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index c9e72531a..7f9623c62 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -133,11 +133,11 @@ public:
133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; 133 regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); 134 auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true);
135 if (depth_buffer.target) 135 if (depth_buffer.target)
136 depth_buffer.target->MarkAsRenderTarget(false); 136 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
137 depth_buffer.target = surface_view.first; 137 depth_buffer.target = surface_view.first;
138 depth_buffer.view = surface_view.second; 138 depth_buffer.view = surface_view.second;
139 if (depth_buffer.target) 139 if (depth_buffer.target)
140 depth_buffer.target->MarkAsRenderTarget(true); 140 depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT);
141 return surface_view.second; 141 return surface_view.second;
142 } 142 }
143 143
@@ -167,11 +167,11 @@ public:
167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), 167 auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
168 preserve_contents, true); 168 preserve_contents, true);
169 if (render_targets[index].target) 169 if (render_targets[index].target)
170 render_targets[index].target->MarkAsRenderTarget(false); 170 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
171 render_targets[index].target = surface_view.first; 171 render_targets[index].target = surface_view.first;
172 render_targets[index].view = surface_view.second; 172 render_targets[index].view = surface_view.second;
173 if (render_targets[index].target) 173 if (render_targets[index].target)
174 render_targets[index].target->MarkAsRenderTarget(true); 174 render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index));
175 return surface_view.second; 175 return surface_view.second;
176 } 176 }
177 177
@@ -191,7 +191,7 @@ public:
191 if (depth_buffer.target == nullptr) { 191 if (depth_buffer.target == nullptr) {
192 return; 192 return;
193 } 193 }
194 depth_buffer.target->MarkAsRenderTarget(false); 194 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
195 depth_buffer.target = nullptr; 195 depth_buffer.target = nullptr;
196 depth_buffer.view = nullptr; 196 depth_buffer.view = nullptr;
197 } 197 }
@@ -200,7 +200,7 @@ public:
200 if (render_targets[index].target == nullptr) { 200 if (render_targets[index].target == nullptr) {
201 return; 201 return;
202 } 202 }
203 render_targets[index].target->MarkAsRenderTarget(false); 203 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
204 render_targets[index].target = nullptr; 204 render_targets[index].target = nullptr;
205 render_targets[index].view = nullptr; 205 render_targets[index].view = nullptr;
206 } 206 }
@@ -270,6 +270,16 @@ protected:
270 // and reading it from a sepparate buffer. 270 // and reading it from a sepparate buffer.
271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 271 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
272 272
273 void ManageRenderTargetUnregister(TSurface& surface) {
274 auto& maxwell3d = system.GPU().Maxwell3D();
275 const u32 index = surface->GetRenderTarget();
276 if (index == DEPTH_RT) {
277 maxwell3d.dirty_flags.zeta_buffer = true;
278 } else {
279 maxwell3d.dirty_flags.color_buffer.set(index, true);
280 }
281 }
282
273 void Register(TSurface surface) { 283 void Register(TSurface surface) {
274 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 284 const GPUVAddr gpu_addr = surface->GetGpuAddr();
275 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); 285 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
@@ -294,6 +304,9 @@ protected:
294 if (guard_render_targets && surface->IsProtected()) { 304 if (guard_render_targets && surface->IsProtected()) {
295 return; 305 return;
296 } 306 }
307 if (!guard_render_targets && surface->IsRenderTarget()) {
308 ManageRenderTargetUnregister(surface);
309 }
297 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 310 const GPUVAddr gpu_addr = surface->GetGpuAddr();
298 const CacheAddr cache_ptr = surface->GetCacheAddr(); 311 const CacheAddr cache_ptr = surface->GetCacheAddr();
299 const std::size_t size = surface->GetSizeInBytes(); 312 const std::size_t size = surface->GetSizeInBytes();
@@ -649,15 +662,6 @@ private:
649 } 662 }
650 return {current_surface, *view}; 663 return {current_surface, *view};
651 } 664 }
652 // The next case is unsafe, so if we r in accurate GPU, just skip it
653 if (Settings::values.use_accurate_gpu_emulation) {
654 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
655 MatchTopologyResult::FullMatch);
656 }
657 // This is the case the texture is a part of the parent.
658 if (current_surface->MatchesSubTexture(params, gpu_addr)) {
659 return RebuildSurface(current_surface, params, is_render);
660 }
661 } else { 665 } else {
662 // If there are many overlaps, odds are they are subtextures of the candidate 666 // If there are many overlaps, odds are they are subtextures of the candidate
663 // surface. We try to construct a new surface based on the candidate parameters, 667 // surface. We try to construct a new surface based on the candidate parameters,
@@ -793,6 +797,9 @@ private:
793 static constexpr u64 registry_page_size{1 << registry_page_bits}; 797 static constexpr u64 registry_page_size{1 << registry_page_bits};
794 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 798 std::unordered_map<CacheAddr, std::vector<TSurface>> registry;
795 799
800 static constexpr u32 DEPTH_RT = 8;
801 static constexpr u32 NO_RT = 0xFFFFFFFF;
802
796 // The L1 Cache is used for fast texture lookup before checking the overlaps 803 // The L1 Cache is used for fast texture lookup before checking the overlaps
797 // This avoids calculating size and other stuffs. 804 // This avoids calculating size and other stuffs.
798 std::unordered_map<CacheAddr, TSurface> l1_cache; 805 std::unordered_map<CacheAddr, TSurface> l1_cache;