diff options
74 files changed, 1856 insertions, 586 deletions
diff --git a/.ci/scripts/.gitkeep b/.ci/scripts/.gitkeep deleted file mode 100644 index e69de29bb..000000000 --- a/.ci/scripts/.gitkeep +++ /dev/null | |||
diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh new file mode 100644 index 000000000..bb4e9d328 --- /dev/null +++ b/.ci/scripts/common/post-upload.sh | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | # Copy documentation | ||
| 4 | cp license.txt "$REV_NAME" | ||
| 5 | cp README.md "$REV_NAME" | ||
| 6 | |||
| 7 | tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME" | ||
| 8 | |||
| 9 | mv "$REV_NAME" $RELEASE_NAME | ||
| 10 | |||
| 11 | 7z a "$REV_NAME.7z" $RELEASE_NAME | ||
| 12 | |||
| 13 | # move the compiled archive into the artifacts directory to be uploaded by travis releases | ||
| 14 | mv "$ARCHIVE_NAME" artifacts/ | ||
| 15 | mv "$REV_NAME.7z" artifacts/ | ||
diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh new file mode 100644 index 000000000..3c2fc79a2 --- /dev/null +++ b/.ci/scripts/common/pre-upload.sh | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`" | ||
| 4 | GITREV="`git show -s --format='%h'`" | ||
| 5 | |||
| 6 | mkdir -p artifacts | ||
diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh new file mode 100644 index 000000000..778411e4a --- /dev/null +++ b/.ci/scripts/format/docker.sh | |||
| @@ -0,0 +1,6 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | # Run clang-format | ||
| 4 | cd /yuzu | ||
| 5 | chmod a+x ./.ci/scripts/format/script.sh | ||
| 6 | ./.ci/scripts/format/script.sh | ||
diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh new file mode 100644 index 000000000..5d6393b38 --- /dev/null +++ b/.ci/scripts/format/exec.sh | |||
| @@ -0,0 +1,4 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | chmod a+x ./.ci/scripts/format/docker.sh | ||
| 4 | docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh | ||
diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh new file mode 100644 index 000000000..5ab828d5e --- /dev/null +++ b/.ci/scripts/format/script.sh | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \ | ||
| 4 | dist/*.svg dist/*.xml; then | ||
| 5 | echo Trailing whitespace found, aborting | ||
| 6 | exit 1 | ||
| 7 | fi | ||
| 8 | |||
| 9 | # Default clang-format points to default 3.5 version one | ||
| 10 | CLANG_FORMAT=clang-format-6.0 | ||
| 11 | $CLANG_FORMAT --version | ||
| 12 | |||
| 13 | if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then | ||
| 14 | # Get list of every file modified in this pull request | ||
| 15 | files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)" | ||
| 16 | else | ||
| 17 | # Check everything for branch pushes | ||
| 18 | files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')" | ||
| 19 | fi | ||
| 20 | |||
| 21 | # Turn off tracing for this because it's too verbose | ||
| 22 | set +x | ||
| 23 | |||
| 24 | for f in $files_to_lint; do | ||
| 25 | d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true) | ||
| 26 | if ! [ -z "$d" ]; then | ||
| 27 | echo "!!! $f not compliant to coding style, here is the fix:" | ||
| 28 | echo "$d" | ||
| 29 | fail=1 | ||
| 30 | fi | ||
| 31 | done | ||
| 32 | |||
| 33 | set -x | ||
| 34 | |||
| 35 | if [ "$fail" = 1 ]; then | ||
| 36 | exit 1 | ||
| 37 | fi | ||
diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh new file mode 100644 index 000000000..f538a4081 --- /dev/null +++ b/.ci/scripts/linux/docker.sh | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | cd /yuzu | ||
| 4 | |||
| 5 | ccache -s | ||
| 6 | |||
| 7 | mkdir build || true && cd build | ||
| 8 | cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON | ||
| 9 | |||
| 10 | ninja | ||
| 11 | |||
| 12 | ccache -s | ||
| 13 | |||
| 14 | ctest -VV -C Release | ||
diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh new file mode 100644 index 000000000..a5a6c34b9 --- /dev/null +++ b/.ci/scripts/linux/exec.sh | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | mkdir -p "ccache" || true | ||
| 4 | chmod a+x ./.ci/scripts/linux/docker.sh | ||
| 5 | docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh | ||
diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh new file mode 100644 index 000000000..0d131d1dd --- /dev/null +++ b/.ci/scripts/linux/upload.sh | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | . .ci/scripts/common/pre-upload.sh | ||
| 4 | |||
| 5 | REV_NAME="yuzu-linux-${GITDATE}-${GITREV}" | ||
| 6 | ARCHIVE_NAME="${REV_NAME}.tar.xz" | ||
| 7 | COMPRESSION_FLAGS="-cJvf" | ||
| 8 | |||
| 9 | mkdir "$REV_NAME" | ||
| 10 | |||
| 11 | cp build/bin/yuzu-cmd "$REV_NAME" | ||
| 12 | cp build/bin/yuzu "$REV_NAME" | ||
| 13 | |||
| 14 | . .ci/scripts/common/post-upload.sh | ||
diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py new file mode 100644 index 000000000..b346001a5 --- /dev/null +++ b/.ci/scripts/merge/apply-patches-by-label.py | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | # Download all pull requests as patches that match a specific label | ||
| 2 | # Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to> | ||
| 3 | |||
| 4 | import requests, sys, json, urllib3.request, shutil, subprocess | ||
| 5 | |||
| 6 | http = urllib3.PoolManager() | ||
| 7 | dl_list = {} | ||
| 8 | |||
| 9 | def check_individual(labels): | ||
| 10 | for label in labels: | ||
| 11 | if (label["name"] == sys.argv[1]): | ||
| 12 | return True | ||
| 13 | return False | ||
| 14 | |||
| 15 | try: | ||
| 16 | url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls' | ||
| 17 | response = requests.get(url) | ||
| 18 | if (response.ok): | ||
| 19 | j = json.loads(response.content) | ||
| 20 | for pr in j: | ||
| 21 | if (check_individual(pr["labels"])): | ||
| 22 | pn = pr["number"] | ||
| 23 | print("Matched PR# %s" % pn) | ||
| 24 | print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"])) | ||
| 25 | print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn])) | ||
| 26 | print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn])) | ||
| 27 | except: | ||
| 28 | sys.exit(-1) | ||
diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py new file mode 100644 index 000000000..048466d7e --- /dev/null +++ b/.ci/scripts/merge/check-label-presence.py | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | # Checks to see if the specified pull request # has the specified tag | ||
| 2 | # Usage: python check-label-presence.py <Pull Request ID> <Name of Label> | ||
| 3 | |||
| 4 | import requests, json, sys | ||
| 5 | |||
| 6 | try: | ||
| 7 | url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1] | ||
| 8 | response = requests.get(url) | ||
| 9 | if (response.ok): | ||
| 10 | j = json.loads(response.content) | ||
| 11 | for label in j["labels"]: | ||
| 12 | if label["name"] == sys.argv[2]: | ||
| 13 | print('##vso[task.setvariable variable=enabletesting;]true') | ||
| 14 | sys.exit() | ||
| 15 | except: | ||
| 16 | sys.exit(-1) | ||
| 17 | |||
| 18 | print('##vso[task.setvariable variable=enabletesting;]false') | ||
diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh new file mode 100644 index 000000000..d9d595bbc --- /dev/null +++ b/.ci/scripts/merge/yuzubot-git-config.sh | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | git config --global user.email "yuzu@yuzu-emu.org" | ||
| 2 | git config --global user.name "yuzubot" \ No newline at end of file | ||
diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh new file mode 100644 index 000000000..f7093363b --- /dev/null +++ b/.ci/scripts/windows/docker.sh | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | cd /yuzu | ||
| 4 | |||
| 5 | ccache -s | ||
| 6 | |||
| 7 | # Dirty hack to trick unicorn makefile into believing we are in a MINGW system | ||
| 8 | mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname | ||
| 9 | chmod +x /bin/uname | ||
| 10 | |||
| 11 | # Dirty hack to trick unicorn makefile into believing we have cmd | ||
| 12 | echo '' >> /bin/cmd | ||
| 13 | chmod +x /bin/cmd | ||
| 14 | |||
| 15 | mkdir build || true && cd build | ||
| 16 | cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release | ||
| 17 | ninja | ||
| 18 | |||
| 19 | # Clean up the dirty hacks | ||
| 20 | rm /bin/uname && mv /bin/uname1 /bin/uname | ||
| 21 | rm /bin/cmd | ||
| 22 | |||
| 23 | ccache -s | ||
| 24 | |||
| 25 | echo "Tests skipped" | ||
| 26 | #ctest -VV -C Release | ||
| 27 | |||
| 28 | echo 'Prepare binaries...' | ||
| 29 | cd .. | ||
| 30 | mkdir package | ||
| 31 | |||
| 32 | QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/' | ||
| 33 | find build/ -name "yuzu*.exe" -exec cp {} 'package' \; | ||
| 34 | |||
| 35 | # copy Qt plugins | ||
| 36 | mkdir package/platforms | ||
| 37 | cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/ | ||
| 38 | cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/ | ||
| 39 | cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/ | ||
| 40 | rm -f package/mediaservice/*d.dll | ||
| 41 | |||
| 42 | for i in package/*.exe; do | ||
| 43 | # we need to process pdb here, however, cv2pdb | ||
| 44 | # does not work here, so we just simply strip all the debug symbols | ||
| 45 | x86_64-w64-mingw32-strip "${i}" | ||
| 46 | done | ||
| 47 | |||
| 48 | pip3 install pefile | ||
| 49 | python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/" | ||
| 50 | python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/" | ||
diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh new file mode 100644 index 000000000..d6a994856 --- /dev/null +++ b/.ci/scripts/windows/exec.sh | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | mkdir -p "ccache" || true | ||
| 4 | chmod a+x ./.ci/scripts/windows/docker.sh | ||
| 5 | docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh | ||
diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py new file mode 100644 index 000000000..163183f2e --- /dev/null +++ b/.ci/scripts/windows/scan_dll.py | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | import pefile | ||
| 2 | import sys | ||
| 3 | import re | ||
| 4 | import os | ||
| 5 | import queue | ||
| 6 | import shutil | ||
| 7 | |||
| 8 | # constant definitions | ||
| 9 | KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL', | ||
| 10 | 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL'] | ||
| 11 | # below is for Ubuntu 18.04 with specified PPA enabled, if you are using | ||
| 12 | # other distro or different repositories, change the following accordingly | ||
| 13 | DLL_PATH = [ | ||
| 14 | '/usr/x86_64-w64-mingw32/bin/', | ||
| 15 | '/usr/x86_64-w64-mingw32/lib/', | ||
| 16 | '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/' | ||
| 17 | ] | ||
| 18 | |||
| 19 | missing = [] | ||
| 20 | |||
| 21 | |||
| 22 | def parse_imports(file_name): | ||
| 23 | results = [] | ||
| 24 | pe = pefile.PE(file_name, fast_load=True) | ||
| 25 | pe.parse_data_directories() | ||
| 26 | |||
| 27 | for entry in pe.DIRECTORY_ENTRY_IMPORT: | ||
| 28 | current = entry.dll.decode() | ||
| 29 | current_u = current.upper() # b/c Windows is often case insensitive | ||
| 30 | # here we filter out system dlls | ||
| 31 | # dll w/ names like *32.dll are likely to be system dlls | ||
| 32 | if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'): | ||
| 33 | results.append(current) | ||
| 34 | |||
| 35 | return results | ||
| 36 | |||
| 37 | |||
| 38 | def parse_imports_recursive(file_name, path_list=[]): | ||
| 39 | q = queue.Queue() # create a FIFO queue | ||
| 40 | # file_name can be a string or a list for the convience | ||
| 41 | if isinstance(file_name, str): | ||
| 42 | q.put(file_name) | ||
| 43 | elif isinstance(file_name, list): | ||
| 44 | for i in file_name: | ||
| 45 | q.put(i) | ||
| 46 | full_list = [] | ||
| 47 | while q.qsize(): | ||
| 48 | current = q.get_nowait() | ||
| 49 | print('> %s' % current) | ||
| 50 | deps = parse_imports(current) | ||
| 51 | # if this dll does not have any import, ignore it | ||
| 52 | if not deps: | ||
| 53 | continue | ||
| 54 | for dep in deps: | ||
| 55 | # the dependency already included in the list, skip | ||
| 56 | if dep in full_list: | ||
| 57 | continue | ||
| 58 | # find the requested dll in the provided paths | ||
| 59 | full_path = find_dll(dep) | ||
| 60 | if not full_path: | ||
| 61 | missing.append(dep) | ||
| 62 | continue | ||
| 63 | full_list.append(dep) | ||
| 64 | q.put(full_path) | ||
| 65 | path_list.append(full_path) | ||
| 66 | return full_list | ||
| 67 | |||
| 68 | |||
| 69 | def find_dll(name): | ||
| 70 | for path in DLL_PATH: | ||
| 71 | for root, _, files in os.walk(path): | ||
| 72 | for f in files: | ||
| 73 | if name.lower() == f.lower(): | ||
| 74 | return os.path.join(root, f) | ||
| 75 | |||
| 76 | |||
| 77 | def deploy(name, dst, dry_run=False): | ||
| 78 | dlls_path = [] | ||
| 79 | parse_imports_recursive(name, dlls_path) | ||
| 80 | for dll_entry in dlls_path: | ||
| 81 | if not dry_run: | ||
| 82 | shutil.copy(dll_entry, dst) | ||
| 83 | else: | ||
| 84 | print('[Dry-Run] Copy %s to %s' % (dll_entry, dst)) | ||
| 85 | print('Deploy completed.') | ||
| 86 | return dlls_path | ||
| 87 | |||
| 88 | |||
| 89 | def main(): | ||
| 90 | if len(sys.argv) < 3: | ||
| 91 | print('Usage: %s [files to examine ...] [target deploy directory]') | ||
| 92 | return 1 | ||
| 93 | to_deploy = sys.argv[1:-1] | ||
| 94 | tgt_dir = sys.argv[-1] | ||
| 95 | if not os.path.isdir(tgt_dir): | ||
| 96 | print('%s is not a directory.' % tgt_dir) | ||
| 97 | return 1 | ||
| 98 | print('Scanning dependencies...') | ||
| 99 | deploy(to_deploy, tgt_dir) | ||
| 100 | if missing: | ||
| 101 | print('Following DLLs are not found: %s' % ('\n'.join(missing))) | ||
| 102 | return 0 | ||
| 103 | |||
| 104 | |||
| 105 | if __name__ == '__main__': | ||
| 106 | main() | ||
diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh new file mode 100644 index 000000000..de73d3541 --- /dev/null +++ b/.ci/scripts/windows/upload.sh | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | #!/bin/bash -ex | ||
| 2 | |||
| 3 | . .ci/scripts/common/pre-upload.sh | ||
| 4 | |||
| 5 | REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}" | ||
| 6 | ARCHIVE_NAME="${REV_NAME}.tar.gz" | ||
| 7 | COMPRESSION_FLAGS="-czvf" | ||
| 8 | |||
| 9 | mkdir "$REV_NAME" | ||
| 10 | # get around the permission issues | ||
| 11 | cp -r package/* "$REV_NAME" | ||
| 12 | |||
| 13 | . .ci/scripts/common/post-upload.sh | ||
diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml new file mode 100644 index 000000000..77eeb96b5 --- /dev/null +++ b/.ci/templates/build-single.yml | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | parameters: | ||
| 2 | artifactSource: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - task: DockerInstaller@0 | ||
| 6 | displayName: 'Prepare Environment' | ||
| 7 | inputs: | ||
| 8 | dockerVersion: '17.09.0-ce' | ||
| 9 | - task: CacheBeta@0 | ||
| 10 | displayName: 'Cache Build System' | ||
| 11 | inputs: | ||
| 12 | key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix) | ||
| 13 | path: $(System.DefaultWorkingDirectory)/ccache | ||
| 14 | cacheHitVar: CACHE_RESTORED | ||
| 15 | - script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh | ||
| 16 | displayName: 'Build' | ||
| 17 | - script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && ./.ci/scripts/$(ScriptFolder)/upload.sh | ||
| 18 | displayName: 'Package Artifacts' | ||
| 19 | - publish: artifacts | ||
| 20 | artifact: 'yuzu-$(BuildName)-$(BuildSuffix)' | ||
| 21 | displayName: 'Upload Artifacts' | ||
diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml new file mode 100644 index 000000000..9975f5c49 --- /dev/null +++ b/.ci/templates/build-standard.yml | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | jobs: | ||
| 2 | - job: build | ||
| 3 | displayName: 'standard' | ||
| 4 | pool: | ||
| 5 | vmImage: ubuntu-latest | ||
| 6 | strategy: | ||
| 7 | maxParallel: 10 | ||
| 8 | matrix: | ||
| 9 | windows: | ||
| 10 | BuildSuffix: 'windows-mingw' | ||
| 11 | ScriptFolder: 'windows' | ||
| 12 | linux: | ||
| 13 | BuildSuffix: 'linux' | ||
| 14 | ScriptFolder: 'linux' | ||
| 15 | steps: | ||
| 16 | - template: ./sync-source.yml | ||
| 17 | parameters: | ||
| 18 | artifactSource: $(parameters.artifactSource) | ||
| 19 | needSubmodules: 'true' | ||
| 20 | - template: ./build-single.yml | ||
| 21 | parameters: | ||
| 22 | artifactSource: 'false' \ No newline at end of file | ||
diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml new file mode 100644 index 000000000..101e52996 --- /dev/null +++ b/.ci/templates/build-testing.yml | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | jobs: | ||
| 2 | - job: build_test | ||
| 3 | displayName: 'testing' | ||
| 4 | pool: | ||
| 5 | vmImage: ubuntu-latest | ||
| 6 | strategy: | ||
| 7 | maxParallel: 10 | ||
| 8 | matrix: | ||
| 9 | windows: | ||
| 10 | BuildSuffix: 'windows-testing' | ||
| 11 | ScriptFolder: 'windows' | ||
| 12 | steps: | ||
| 13 | - task: PythonScript@0 | ||
| 14 | condition: eq(variables['Build.Reason'], 'PullRequest') | ||
| 15 | displayName: 'Determine Testing Status' | ||
| 16 | inputs: | ||
| 17 | scriptSource: 'filePath' | ||
| 18 | scriptPath: '../scripts/merge/check-label-presence.py' | ||
| 19 | arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build' | ||
| 20 | - ${{ if eq(variables.enabletesting, 'true') }}: | ||
| 21 | - template: ./sync-source.yml | ||
| 22 | parameters: | ||
| 23 | artifactSource: $(parameters.artifactSource) | ||
| 24 | needSubmodules: 'true' | ||
| 25 | - template: ./mergebot.yml | ||
| 26 | parameters: | ||
| 27 | matchLabel: 'testing-merge' | ||
| 28 | - template: ./build-single.yml | ||
| 29 | parameters: | ||
| 30 | artifactSource: 'false' \ No newline at end of file | ||
diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml new file mode 100644 index 000000000..5061f1cb8 --- /dev/null +++ b/.ci/templates/format-check.yml | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | parameters: | ||
| 2 | artifactSource: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - template: ./sync-source.yml | ||
| 6 | parameters: | ||
| 7 | artifactSource: $(parameters.artifactSource) | ||
| 8 | needSubmodules: 'false' | ||
| 9 | - task: DockerInstaller@0 | ||
| 10 | displayName: 'Prepare Environment' | ||
| 11 | inputs: | ||
| 12 | dockerVersion: '17.09.0-ce' | ||
| 13 | - script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh | ||
| 14 | displayName: 'Verify Formatting' | ||
diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml new file mode 100644 index 000000000..efc82778a --- /dev/null +++ b/.ci/templates/merge.yml | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | jobs: | ||
| 2 | - job: merge | ||
| 3 | displayName: 'pull requests' | ||
| 4 | steps: | ||
| 5 | - checkout: self | ||
| 6 | submodules: recursive | ||
| 7 | - template: ./mergebot.yml | ||
| 8 | parameters: | ||
| 9 | matchLabel: '$(BuildName)-merge' | ||
| 10 | - task: ArchiveFiles@2 | ||
| 11 | displayName: 'Package Source' | ||
| 12 | inputs: | ||
| 13 | rootFolderOrFile: '$(System.DefaultWorkingDirectory)' | ||
| 14 | includeRootFolder: false | ||
| 15 | archiveType: '7z' | ||
| 16 | archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' | ||
| 17 | - task: PublishPipelineArtifact@1 | ||
| 18 | displayName: 'Upload Artifacts' | ||
| 19 | inputs: | ||
| 20 | targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' | ||
| 21 | artifact: 'yuzu-$(BuildName)-source' | ||
| 22 | replaceExistingArchive: true | ||
| 23 | - job: upload_source | ||
| 24 | displayName: 'upload' | ||
| 25 | dependsOn: merge | ||
| 26 | steps: | ||
| 27 | - template: ./sync-source.yml | ||
| 28 | parameters: | ||
| 29 | artifactSource: 'true' | ||
| 30 | needSubmodules: 'true' | ||
| 31 | - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh | ||
| 32 | displayName: 'Apply Git Configuration' | ||
| 33 | - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)" | ||
| 34 | displayName: 'Tag Source' | ||
| 35 | - script: git remote add other $(GitRepoPushChangesURL) | ||
| 36 | displayName: 'Register Repository' | ||
| 37 | - script: git push --follow-tags --force other HEAD:$(GitPushBranch) | ||
| 38 | displayName: 'Update Code' | ||
| 39 | - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha | ||
| 40 | displayName: 'Calculate Release Point' | ||
| 41 | - task: PublishPipelineArtifact@1 | ||
| 42 | displayName: 'Upload Release Point' | ||
| 43 | inputs: | ||
| 44 | targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha' | ||
| 45 | artifact: 'yuzu-$(BuildName)-release-point' | ||
| 46 | replaceExistingArchive: true \ No newline at end of file | ||
diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml new file mode 100644 index 000000000..5211efcc6 --- /dev/null +++ b/.ci/templates/mergebot.yml | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | parameters: | ||
| 2 | matchLabel: 'dummy-merge' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3 | ||
| 6 | displayName: 'Prepare Environment' | ||
| 7 | - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh | ||
| 8 | displayName: 'Apply Git Configuration' | ||
| 9 | - task: PythonScript@0 | ||
| 10 | displayName: 'Discover, Download, and Apply Patches' | ||
| 11 | inputs: | ||
| 12 | scriptSource: 'filePath' | ||
| 13 | scriptPath: '.ci/scripts/merge/apply-patches-by-label.py' | ||
| 14 | arguments: '${{ parameters.matchLabel }} patches' | ||
| 15 | workingDirectory: '$(System.DefaultWorkingDirectory)' | ||
diff --git a/.ci/templates/release.yml b/.ci/templates/release.yml new file mode 100644 index 000000000..60bebd2aa --- /dev/null +++ b/.ci/templates/release.yml | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | steps: | ||
| 2 | - task: DownloadPipelineArtifact@2 | ||
| 3 | displayName: 'Download Windows Release' | ||
| 4 | inputs: | ||
| 5 | artifactName: 'yuzu-$(BuildName)-windows-mingw' | ||
| 6 | buildType: 'current' | ||
| 7 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 8 | - task: DownloadPipelineArtifact@2 | ||
| 9 | displayName: 'Download Linux Release' | ||
| 10 | inputs: | ||
| 11 | artifactName: 'yuzu-$(BuildName)-linux' | ||
| 12 | buildType: 'current' | ||
| 13 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 14 | - task: DownloadPipelineArtifact@2 | ||
| 15 | displayName: 'Download Release Point' | ||
| 16 | inputs: | ||
| 17 | artifactName: 'yuzu-$(BuildName)-release-point' | ||
| 18 | buildType: 'current' | ||
| 19 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 20 | - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha | ||
| 21 | displayName: 'Calculate Release Point' | ||
| 22 | - task: GitHubRelease@0 | ||
| 23 | inputs: | ||
| 24 | gitHubConnection: $(GitHubReleaseConnectionName) | ||
| 25 | repositoryName: '$(GitHubReleaseRepoName)' | ||
| 26 | action: 'create' | ||
| 27 | target: $(variables.tagcommit) | ||
| 28 | title: 'yuzu $(BuildName) #$(Build.BuildId)' | ||
| 29 | assets: '$(Build.ArtifactStagingDirectory)/*' | ||
diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml new file mode 100644 index 000000000..47d217e7b --- /dev/null +++ b/.ci/templates/retrieve-artifact-source.yml | |||
| @@ -0,0 +1,16 @@ | |||
| 1 | steps: | ||
| 2 | - checkout: none | ||
| 3 | - task: DownloadPipelineArtifact@2 | ||
| 4 | displayName: 'Download Source' | ||
| 5 | inputs: | ||
| 6 | artifactName: 'yuzu-$(BuildName)-source' | ||
| 7 | buildType: 'current' | ||
| 8 | targetPath: '$(Build.ArtifactStagingDirectory)' | ||
| 9 | - script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory) | ||
| 10 | displayName: 'Clean Working Directory' | ||
| 11 | - task: ExtractFiles@1 | ||
| 12 | displayName: 'Prepare Source' | ||
| 13 | inputs: | ||
| 14 | archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z' | ||
| 15 | destinationFolder: '$(System.DefaultWorkingDirectory)' | ||
| 16 | cleanDestinationFolder: false \ No newline at end of file | ||
diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml new file mode 100644 index 000000000..a08a3f926 --- /dev/null +++ b/.ci/templates/retrieve-master-source.yml | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | parameters: | ||
| 2 | needSubmodules: 'true' | ||
| 3 | |||
| 4 | steps: | ||
| 5 | - checkout: self | ||
| 6 | displayName: 'Checkout Recursive' | ||
| 7 | submodules: recursive | ||
| 8 | # condition: eq(parameters.needSubmodules, 'true') | ||
| 9 | #- checkout: self | ||
| 10 | # displayName: 'Checkout Fast' | ||
| 11 | # condition: ne(parameters.needSubmodules, 'true') | ||
diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml new file mode 100644 index 000000000..409e1cd83 --- /dev/null +++ b/.ci/templates/sync-source.yml | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | steps: | ||
| 2 | - ${{ if eq(parameters.artifactSource, 'true') }}: | ||
| 3 | - template: ./retrieve-artifact-source.yml | ||
| 4 | - ${{ if ne(parameters.artifactSource, 'true') }}: | ||
| 5 | - template: ./retrieve-master-source.yml | ||
| 6 | parameters: | ||
| 7 | needSubmodules: $(parameters.needSubmodules) \ No newline at end of file | ||
diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml index aa912913d..164bcb165 100644 --- a/.ci/yuzu-mainline.yml +++ b/.ci/yuzu-mainline.yml | |||
| @@ -1,19 +1,23 @@ | |||
| 1 | # Starter pipeline | ||
| 2 | # Start with a minimal pipeline that you can customize to build and deploy your code. | ||
| 3 | # Add steps that build, run tests, deploy, and more: | ||
| 4 | # https://aka.ms/yaml | ||
| 5 | |||
| 6 | trigger: | 1 | trigger: |
| 7 | - master | 2 | - master |
| 8 | 3 | ||
| 9 | pool: | 4 | stages: |
| 10 | vmImage: 'ubuntu-latest' | 5 | - stage: merge |
| 11 | 6 | displayName: 'merge' | |
| 12 | steps: | 7 | jobs: |
| 13 | - script: echo Hello, world! | 8 | - template: ./templates/merge.yml |
| 14 | displayName: 'Run a one-line script' | 9 | - stage: format |
| 15 | 10 | dependsOn: merge | |
| 16 | - script: | | 11 | displayName: 'format' |
| 17 | echo Add other tasks to build, test, and deploy your project. | 12 | jobs: |
| 18 | echo See https://aka.ms/yaml | 13 | - job: format |
| 19 | displayName: 'Run a multi-line script' | 14 | displayName: 'clang' |
| 15 | pool: | ||
| 16 | vmImage: ubuntu-latest | ||
| 17 | steps: | ||
| 18 | - template: ./templates/format-check.yml | ||
| 19 | - stage: build | ||
| 20 | displayName: 'build' | ||
| 21 | dependsOn: format | ||
| 22 | jobs: | ||
| 23 | - template: ./templates/build-standard.yml | ||
diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml new file mode 100644 index 000000000..d01c1feed --- /dev/null +++ b/.ci/yuzu-verify.yml | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | stages: | ||
| 2 | - stage: format | ||
| 3 | displayName: 'format' | ||
| 4 | jobs: | ||
| 5 | - job: format | ||
| 6 | displayName: 'clang' | ||
| 7 | pool: | ||
| 8 | vmImage: ubuntu-latest | ||
| 9 | steps: | ||
| 10 | - template: ./templates/format-check.yml | ||
| 11 | parameters: | ||
| 12 | artifactSource: 'false' | ||
| 13 | - stage: build | ||
| 14 | displayName: 'build' | ||
| 15 | dependsOn: format | ||
| 16 | jobs: | ||
| 17 | - template: ./templates/build-standard.yml | ||
| 18 | - template: ./templates/build-testing.yml \ No newline at end of file | ||
diff --git a/.ci/yuzu.yml b/.ci/yuzu.yml deleted file mode 100644 index aa912913d..000000000 --- a/.ci/yuzu.yml +++ /dev/null | |||
| @@ -1,19 +0,0 @@ | |||
| 1 | # Starter pipeline | ||
| 2 | # Start with a minimal pipeline that you can customize to build and deploy your code. | ||
| 3 | # Add steps that build, run tests, deploy, and more: | ||
| 4 | # https://aka.ms/yaml | ||
| 5 | |||
| 6 | trigger: | ||
| 7 | - master | ||
| 8 | |||
| 9 | pool: | ||
| 10 | vmImage: 'ubuntu-latest' | ||
| 11 | |||
| 12 | steps: | ||
| 13 | - script: echo Hello, world! | ||
| 14 | displayName: 'Run a one-line script' | ||
| 15 | |||
| 16 | - script: | | ||
| 17 | echo Add other tasks to build, test, and deploy your project. | ||
| 18 | echo See https://aka.ms/yaml | ||
| 19 | displayName: 'Run a multi-line script' | ||
diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index eb76174c5..7310b3602 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp | |||
| @@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const { | |||
| 94 | return aci_file_access.permissions; | 94 | return aci_file_access.permissions; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | u32 ProgramMetadata::GetSystemResourceSize() const { | ||
| 98 | return npdm_header.system_resource_size; | ||
| 99 | } | ||
| 100 | |||
| 97 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { | 101 | const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { |
| 98 | return aci_kernel_capabilities; | 102 | return aci_kernel_capabilities; |
| 99 | } | 103 | } |
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 43bf2820a..88ec97d85 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h | |||
| @@ -58,6 +58,7 @@ public: | |||
| 58 | u32 GetMainThreadStackSize() const; | 58 | u32 GetMainThreadStackSize() const; |
| 59 | u64 GetTitleID() const; | 59 | u64 GetTitleID() const; |
| 60 | u64 GetFilesystemPermissions() const; | 60 | u64 GetFilesystemPermissions() const; |
| 61 | u32 GetSystemResourceSize() const; | ||
| 61 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; | 62 | const KernelCapabilityDescriptors& GetKernelCapabilities() const; |
| 62 | 63 | ||
| 63 | void Print() const; | 64 | void Print() const; |
| @@ -76,7 +77,8 @@ private: | |||
| 76 | u8 reserved_3; | 77 | u8 reserved_3; |
| 77 | u8 main_thread_priority; | 78 | u8 main_thread_priority; |
| 78 | u8 main_thread_cpu; | 79 | u8 main_thread_cpu; |
| 79 | std::array<u8, 8> reserved_4; | 80 | std::array<u8, 4> reserved_4; |
| 81 | u32_le system_resource_size; | ||
| 80 | u32_le process_category; | 82 | u32_le process_category; |
| 81 | u32_le main_stack_size; | 83 | u32_le main_stack_size; |
| 82 | std::array<u8, 0x10> application_name; | 84 | std::array<u8, 0x10> application_name; |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index f45ef05f6..db3ab14ce 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const { | |||
| 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); | 129 | return vm_manager.GetTotalPhysicalMemoryAvailable(); |
| 130 | } | 130 | } |
| 131 | 131 | ||
| 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { | 132 | u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { |
| 133 | // TODO: Subtract the personal heap size from this when the | 133 | return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize(); |
| 134 | // personal heap is implemented. | ||
| 135 | return GetTotalPhysicalMemoryAvailable(); | ||
| 136 | } | 134 | } |
| 137 | 135 | ||
| 138 | u64 Process::GetTotalPhysicalMemoryUsed() const { | 136 | u64 Process::GetTotalPhysicalMemoryUsed() const { |
| 139 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; | 137 | return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size + |
| 138 | GetSystemResourceUsage(); | ||
| 140 | } | 139 | } |
| 141 | 140 | ||
| 142 | u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { | 141 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { |
| 143 | // TODO: Subtract the personal heap size from this when the | 142 | return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage(); |
| 144 | // personal heap is implemented. | ||
| 145 | return GetTotalPhysicalMemoryUsed(); | ||
| 146 | } | 143 | } |
| 147 | 144 | ||
| 148 | void Process::RegisterThread(const Thread* thread) { | 145 | void Process::RegisterThread(const Thread* thread) { |
| @@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { | |||
| 172 | program_id = metadata.GetTitleID(); | 169 | program_id = metadata.GetTitleID(); |
| 173 | ideal_core = metadata.GetMainThreadCore(); | 170 | ideal_core = metadata.GetMainThreadCore(); |
| 174 | is_64bit_process = metadata.Is64BitProgram(); | 171 | is_64bit_process = metadata.Is64BitProgram(); |
| 172 | system_resource_size = metadata.GetSystemResourceSize(); | ||
| 175 | 173 | ||
| 176 | vm_manager.Reset(metadata.GetAddressSpaceType()); | 174 | vm_manager.Reset(metadata.GetAddressSpaceType()); |
| 177 | 175 | ||
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83ea02bee..3196014da 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -168,8 +168,24 @@ public: | |||
| 168 | return capabilities.GetPriorityMask(); | 168 | return capabilities.GetPriorityMask(); |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | u32 IsVirtualMemoryEnabled() const { | 171 | /// Gets the amount of secure memory to allocate for memory management. |
| 172 | return is_virtual_address_memory_enabled; | 172 | u32 GetSystemResourceSize() const { |
| 173 | return system_resource_size; | ||
| 174 | } | ||
| 175 | |||
| 176 | /// Gets the amount of secure memory currently in use for memory management. | ||
| 177 | u32 GetSystemResourceUsage() const { | ||
| 178 | // On hardware, this returns the amount of system resource memory that has | ||
| 179 | // been used by the kernel. This is problematic for Yuzu to emulate, because | ||
| 180 | // system resource memory is used for page tables -- and yuzu doesn't really | ||
| 181 | // have a way to calculate how much memory is required for page tables for | ||
| 182 | // the current process at any given time. | ||
| 183 | // TODO: Is this even worth implementing? Games may retrieve this value via | ||
| 184 | // an SDK function that gets used + available system resource size for debug | ||
| 185 | // or diagnostic purposes. However, it seems unlikely that a game would make | ||
| 186 | // decisions based on how much system memory is dedicated to its page tables. | ||
| 187 | // Is returning a value other than zero wise? | ||
| 188 | return 0; | ||
| 173 | } | 189 | } |
| 174 | 190 | ||
| 175 | /// Whether this process is an AArch64 or AArch32 process. | 191 | /// Whether this process is an AArch64 or AArch32 process. |
| @@ -196,15 +212,15 @@ public: | |||
| 196 | u64 GetTotalPhysicalMemoryAvailable() const; | 212 | u64 GetTotalPhysicalMemoryAvailable() const; |
| 197 | 213 | ||
| 198 | /// Retrieves the total physical memory available to this process in bytes, | 214 | /// Retrieves the total physical memory available to this process in bytes, |
| 199 | /// without the size of the personal heap added to it. | 215 | /// without the size of the personal system resource heap added to it. |
| 200 | u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; | 216 | u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const; |
| 201 | 217 | ||
| 202 | /// Retrieves the total physical memory used by this process in bytes. | 218 | /// Retrieves the total physical memory used by this process in bytes. |
| 203 | u64 GetTotalPhysicalMemoryUsed() const; | 219 | u64 GetTotalPhysicalMemoryUsed() const; |
| 204 | 220 | ||
| 205 | /// Retrieves the total physical memory used by this process in bytes, | 221 | /// Retrieves the total physical memory used by this process in bytes, |
| 206 | /// without the size of the personal heap added to it. | 222 | /// without the size of the personal system resource heap added to it. |
| 207 | u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; | 223 | u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const; |
| 208 | 224 | ||
| 209 | /// Gets the list of all threads created with this process as their owner. | 225 | /// Gets the list of all threads created with this process as their owner. |
| 210 | const std::list<const Thread*>& GetThreadList() const { | 226 | const std::list<const Thread*>& GetThreadList() const { |
| @@ -298,12 +314,16 @@ private: | |||
| 298 | /// Title ID corresponding to the process | 314 | /// Title ID corresponding to the process |
| 299 | u64 program_id = 0; | 315 | u64 program_id = 0; |
| 300 | 316 | ||
| 317 | /// Specifies additional memory to be reserved for the process's memory management by the | ||
| 318 | /// system. When this is non-zero, secure memory is allocated and used for page table allocation | ||
| 319 | /// instead of using the normal global page tables/memory block management. | ||
| 320 | u32 system_resource_size = 0; | ||
| 321 | |||
| 301 | /// Resource limit descriptor for this process | 322 | /// Resource limit descriptor for this process |
| 302 | SharedPtr<ResourceLimit> resource_limit; | 323 | SharedPtr<ResourceLimit> resource_limit; |
| 303 | 324 | ||
| 304 | /// The ideal CPU core for this process, threads are scheduled on this core by default. | 325 | /// The ideal CPU core for this process, threads are scheduled on this core by default. |
| 305 | u8 ideal_core = 0; | 326 | u8 ideal_core = 0; |
| 306 | u32 is_virtual_address_memory_enabled = 0; | ||
| 307 | 327 | ||
| 308 | /// The Thread Local Storage area is allocated as processes create threads, | 328 | /// The Thread Local Storage area is allocated as processes create threads, |
| 309 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part | 329 | /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 58374f829..a46eed3da 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -736,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 736 | StackRegionBaseAddr = 14, | 736 | StackRegionBaseAddr = 14, |
| 737 | StackRegionSize = 15, | 737 | StackRegionSize = 15, |
| 738 | // 3.0.0+ | 738 | // 3.0.0+ |
| 739 | IsVirtualAddressMemoryEnabled = 16, | 739 | SystemResourceSize = 16, |
| 740 | PersonalMmHeapUsage = 17, | 740 | SystemResourceUsage = 17, |
| 741 | TitleId = 18, | 741 | TitleId = 18, |
| 742 | // 4.0.0+ | 742 | // 4.0.0+ |
| 743 | PrivilegedProcessId = 19, | 743 | PrivilegedProcessId = 19, |
| 744 | // 5.0.0+ | 744 | // 5.0.0+ |
| 745 | UserExceptionContextAddr = 20, | 745 | UserExceptionContextAddr = 20, |
| 746 | // 6.0.0+ | 746 | // 6.0.0+ |
| 747 | TotalPhysicalMemoryAvailableWithoutMmHeap = 21, | 747 | TotalPhysicalMemoryAvailableWithoutSystemResource = 21, |
| 748 | TotalPhysicalMemoryUsedWithoutMmHeap = 22, | 748 | TotalPhysicalMemoryUsedWithoutSystemResource = 22, |
| 749 | }; | 749 | }; |
| 750 | 750 | ||
| 751 | const auto info_id_type = static_cast<GetInfoType>(info_id); | 751 | const auto info_id_type = static_cast<GetInfoType>(info_id); |
| @@ -763,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 763 | case GetInfoType::StackRegionSize: | 763 | case GetInfoType::StackRegionSize: |
| 764 | case GetInfoType::TotalPhysicalMemoryAvailable: | 764 | case GetInfoType::TotalPhysicalMemoryAvailable: |
| 765 | case GetInfoType::TotalPhysicalMemoryUsed: | 765 | case GetInfoType::TotalPhysicalMemoryUsed: |
| 766 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 766 | case GetInfoType::SystemResourceSize: |
| 767 | case GetInfoType::PersonalMmHeapUsage: | 767 | case GetInfoType::SystemResourceUsage: |
| 768 | case GetInfoType::TitleId: | 768 | case GetInfoType::TitleId: |
| 769 | case GetInfoType::UserExceptionContextAddr: | 769 | case GetInfoType::UserExceptionContextAddr: |
| 770 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 770 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 771 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { | 771 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: { |
| 772 | if (info_sub_id != 0) { | 772 | if (info_sub_id != 0) { |
| 773 | return ERR_INVALID_ENUM_VALUE; | 773 | return ERR_INVALID_ENUM_VALUE; |
| 774 | } | 774 | } |
| @@ -829,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 829 | *result = process->GetTotalPhysicalMemoryUsed(); | 829 | *result = process->GetTotalPhysicalMemoryUsed(); |
| 830 | return RESULT_SUCCESS; | 830 | return RESULT_SUCCESS; |
| 831 | 831 | ||
| 832 | case GetInfoType::IsVirtualAddressMemoryEnabled: | 832 | case GetInfoType::SystemResourceSize: |
| 833 | *result = process->IsVirtualMemoryEnabled(); | 833 | *result = process->GetSystemResourceSize(); |
| 834 | return RESULT_SUCCESS; | ||
| 835 | |||
| 836 | case GetInfoType::SystemResourceUsage: | ||
| 837 | LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage"); | ||
| 838 | *result = process->GetSystemResourceUsage(); | ||
| 834 | return RESULT_SUCCESS; | 839 | return RESULT_SUCCESS; |
| 835 | 840 | ||
| 836 | case GetInfoType::TitleId: | 841 | case GetInfoType::TitleId: |
| @@ -843,12 +848,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 843 | *result = 0; | 848 | *result = 0; |
| 844 | return RESULT_SUCCESS; | 849 | return RESULT_SUCCESS; |
| 845 | 850 | ||
| 846 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: | 851 | case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: |
| 847 | *result = process->GetTotalPhysicalMemoryAvailable(); | 852 | *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource(); |
| 848 | return RESULT_SUCCESS; | 853 | return RESULT_SUCCESS; |
| 849 | 854 | ||
| 850 | case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: | 855 | case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: |
| 851 | *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); | 856 | *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource(); |
| 852 | return RESULT_SUCCESS; | 857 | return RESULT_SUCCESS; |
| 853 | 858 | ||
| 854 | default: | 859 | default: |
| @@ -953,6 +958,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 953 | } | 958 | } |
| 954 | } | 959 | } |
| 955 | 960 | ||
| 961 | /// Maps memory at a desired address | ||
| 962 | static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 963 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 964 | |||
| 965 | if (!Common::Is4KBAligned(addr)) { | ||
| 966 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 967 | return ERR_INVALID_ADDRESS; | ||
| 968 | } | ||
| 969 | |||
| 970 | if (!Common::Is4KBAligned(size)) { | ||
| 971 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 972 | return ERR_INVALID_SIZE; | ||
| 973 | } | ||
| 974 | |||
| 975 | if (size == 0) { | ||
| 976 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 977 | return ERR_INVALID_SIZE; | ||
| 978 | } | ||
| 979 | |||
| 980 | if (!(addr < addr + size)) { | ||
| 981 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 982 | return ERR_INVALID_MEMORY_RANGE; | ||
| 983 | } | ||
| 984 | |||
| 985 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 986 | auto& vm_manager = current_process->VMManager(); | ||
| 987 | |||
| 988 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 989 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 990 | return ERR_INVALID_STATE; | ||
| 991 | } | ||
| 992 | |||
| 993 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 994 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 995 | return ERR_INVALID_MEMORY_RANGE; | ||
| 996 | } | ||
| 997 | |||
| 998 | return vm_manager.MapPhysicalMemory(addr, size); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | /// Unmaps memory previously mapped via MapPhysicalMemory | ||
| 1002 | static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { | ||
| 1003 | LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); | ||
| 1004 | |||
| 1005 | if (!Common::Is4KBAligned(addr)) { | ||
| 1006 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | ||
| 1007 | return ERR_INVALID_ADDRESS; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | if (!Common::Is4KBAligned(size)) { | ||
| 1011 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | ||
| 1012 | return ERR_INVALID_SIZE; | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | if (size == 0) { | ||
| 1016 | LOG_ERROR(Kernel_SVC, "Size is zero"); | ||
| 1017 | return ERR_INVALID_SIZE; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | if (!(addr < addr + size)) { | ||
| 1021 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | ||
| 1022 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1023 | } | ||
| 1024 | |||
| 1025 | Process* const current_process = system.Kernel().CurrentProcess(); | ||
| 1026 | auto& vm_manager = current_process->VMManager(); | ||
| 1027 | |||
| 1028 | if (current_process->GetSystemResourceSize() == 0) { | ||
| 1029 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | ||
| 1030 | return ERR_INVALID_STATE; | ||
| 1031 | } | ||
| 1032 | |||
| 1033 | if (!vm_manager.IsWithinMapRegion(addr, size)) { | ||
| 1034 | LOG_ERROR(Kernel_SVC, "Range not within map region"); | ||
| 1035 | return ERR_INVALID_MEMORY_RANGE; | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | return vm_manager.UnmapPhysicalMemory(addr, size); | ||
| 1039 | } | ||
| 1040 | |||
| 956 | /// Sets the thread activity | 1041 | /// Sets the thread activity |
| 957 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { | 1042 | static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { |
| 958 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); | 1043 | LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); |
| @@ -2310,8 +2395,8 @@ static const FunctionDef SVC_Table[] = { | |||
| 2310 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, | 2395 | {0x29, SvcWrap<GetInfo>, "GetInfo"}, |
| 2311 | {0x2A, nullptr, "FlushEntireDataCache"}, | 2396 | {0x2A, nullptr, "FlushEntireDataCache"}, |
| 2312 | {0x2B, nullptr, "FlushDataCache"}, | 2397 | {0x2B, nullptr, "FlushDataCache"}, |
| 2313 | {0x2C, nullptr, "MapPhysicalMemory"}, | 2398 | {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, |
| 2314 | {0x2D, nullptr, "UnmapPhysicalMemory"}, | 2399 | {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, |
| 2315 | {0x2E, nullptr, "GetFutureThreadInfo"}, | 2400 | {0x2E, nullptr, "GetFutureThreadInfo"}, |
| 2316 | {0x2F, nullptr, "GetLastThreadInfo"}, | 2401 | {0x2F, nullptr, "GetLastThreadInfo"}, |
| 2317 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, | 2402 | {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, |
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 865473c6f..c2d8d0dc3 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h | |||
| @@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) { | |||
| 32 | FuncReturn(system, func(system, Param(system, 0)).raw); | 32 | FuncReturn(system, func(system, Param(system, 0)).raw); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | template <ResultCode func(Core::System&, u64, u64)> | ||
| 36 | void SvcWrap(Core::System& system) { | ||
| 37 | FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); | ||
| 38 | } | ||
| 39 | |||
| 35 | template <ResultCode func(Core::System&, u32)> | 40 | template <ResultCode func(Core::System&, u32)> |
| 36 | void SvcWrap(Core::System& system) { | 41 | void SvcWrap(Core::System& system) { |
| 37 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); | 42 | FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 7bc925a5f..4f45fb03b 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include "core/core.h" | 11 | #include "core/core.h" |
| 12 | #include "core/file_sys/program_metadata.h" | 12 | #include "core/file_sys/program_metadata.h" |
| 13 | #include "core/hle/kernel/errors.h" | 13 | #include "core/hle/kernel/errors.h" |
| 14 | #include "core/hle/kernel/process.h" | ||
| 15 | #include "core/hle/kernel/resource_limit.h" | ||
| 14 | #include "core/hle/kernel/vm_manager.h" | 16 | #include "core/hle/kernel/vm_manager.h" |
| 15 | #include "core/memory.h" | 17 | #include "core/memory.h" |
| 16 | #include "core/memory_setup.h" | 18 | #include "core/memory_setup.h" |
| @@ -48,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | |||
| 48 | type != next.type) { | 50 | type != next.type) { |
| 49 | return false; | 51 | return false; |
| 50 | } | 52 | } |
| 51 | if (type == VMAType::AllocatedMemoryBlock && | 53 | if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) { |
| 52 | (backing_block != next.backing_block || offset + size != next.offset)) { | 54 | // TODO: Can device mapped memory be merged sanely? |
| 55 | // Not merging it may cause inaccuracies versus hardware when memory layout is queried. | ||
| 53 | return false; | 56 | return false; |
| 54 | } | 57 | } |
| 58 | if (type == VMAType::AllocatedMemoryBlock) { | ||
| 59 | return true; | ||
| 60 | } | ||
| 55 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { | 61 | if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { |
| 56 | return false; | 62 | return false; |
| 57 | } | 63 | } |
| @@ -99,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { | |||
| 99 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | 105 | ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, |
| 100 | std::shared_ptr<std::vector<u8>> block, | 106 | std::shared_ptr<std::vector<u8>> block, |
| 101 | std::size_t offset, u64 size, | 107 | std::size_t offset, u64 size, |
| 102 | MemoryState state) { | 108 | MemoryState state, VMAPermission perm) { |
| 103 | ASSERT(block != nullptr); | 109 | ASSERT(block != nullptr); |
| 104 | ASSERT(offset + size <= block->size()); | 110 | ASSERT(offset + size <= block->size()); |
| 105 | 111 | ||
| @@ -109,7 +115,7 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, | |||
| 109 | ASSERT(final_vma.size == size); | 115 | ASSERT(final_vma.size == size); |
| 110 | 116 | ||
| 111 | final_vma.type = VMAType::AllocatedMemoryBlock; | 117 | final_vma.type = VMAType::AllocatedMemoryBlock; |
| 112 | final_vma.permissions = VMAPermission::ReadWrite; | 118 | final_vma.permissions = perm; |
| 113 | final_vma.state = state; | 119 | final_vma.state = state; |
| 114 | final_vma.backing_block = std::move(block); | 120 | final_vma.backing_block = std::move(block); |
| 115 | final_vma.offset = offset; | 121 | final_vma.offset = offset; |
| @@ -288,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { | |||
| 288 | return MakeResult<VAddr>(heap_region_base); | 294 | return MakeResult<VAddr>(heap_region_base); |
| 289 | } | 295 | } |
| 290 | 296 | ||
| 297 | ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { | ||
| 298 | const auto end_addr = target + size; | ||
| 299 | const auto last_addr = end_addr - 1; | ||
| 300 | VAddr cur_addr = target; | ||
| 301 | |||
| 302 | ResultCode result = RESULT_SUCCESS; | ||
| 303 | |||
| 304 | // Check how much memory we've already mapped. | ||
| 305 | const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); | ||
| 306 | if (mapped_size_result.Failed()) { | ||
| 307 | return mapped_size_result.Code(); | ||
| 308 | } | ||
| 309 | |||
| 310 | // If we've already mapped the desired amount, return early. | ||
| 311 | const std::size_t mapped_size = *mapped_size_result; | ||
| 312 | if (mapped_size == size) { | ||
| 313 | return RESULT_SUCCESS; | ||
| 314 | } | ||
| 315 | |||
| 316 | // Check that we can map the memory we want. | ||
| 317 | const auto res_limit = system.CurrentProcess()->GetResourceLimit(); | ||
| 318 | const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) - | ||
| 319 | res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory); | ||
| 320 | if (physmem_remaining < (size - mapped_size)) { | ||
| 321 | return ERR_RESOURCE_LIMIT_EXCEEDED; | ||
| 322 | } | ||
| 323 | |||
| 324 | // Keep track of the memory regions we unmap. | ||
| 325 | std::vector<std::pair<u64, u64>> mapped_regions; | ||
| 326 | |||
| 327 | // Iterate, trying to map memory. | ||
| 328 | { | ||
| 329 | cur_addr = target; | ||
| 330 | |||
| 331 | auto iter = FindVMA(target); | ||
| 332 | ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); | ||
| 333 | |||
| 334 | while (true) { | ||
| 335 | const auto& vma = iter->second; | ||
| 336 | const auto vma_start = vma.base; | ||
| 337 | const auto vma_end = vma_start + vma.size; | ||
| 338 | const auto vma_last = vma_end - 1; | ||
| 339 | |||
| 340 | // Map the memory block | ||
| 341 | const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 342 | if (vma.state == MemoryState::Unmapped) { | ||
| 343 | const auto map_res = | ||
| 344 | MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0, | ||
| 345 | map_size, MemoryState::Heap, VMAPermission::ReadWrite); | ||
| 346 | result = map_res.Code(); | ||
| 347 | if (result.IsError()) { | ||
| 348 | break; | ||
| 349 | } | ||
| 350 | |||
| 351 | mapped_regions.emplace_back(cur_addr, map_size); | ||
| 352 | } | ||
| 353 | |||
| 354 | // Break once we hit the end of the range. | ||
| 355 | if (last_addr <= vma_last) { | ||
| 356 | break; | ||
| 357 | } | ||
| 358 | |||
| 359 | // Advance to the next block. | ||
| 360 | cur_addr = vma_end; | ||
| 361 | iter = FindVMA(cur_addr); | ||
| 362 | ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); | ||
| 363 | } | ||
| 364 | } | ||
| 365 | |||
| 366 | // If we failed, unmap memory. | ||
| 367 | if (result.IsError()) { | ||
| 368 | for (const auto [unmap_address, unmap_size] : mapped_regions) { | ||
| 369 | ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), | ||
| 370 | "MapPhysicalMemory un-map on error"); | ||
| 371 | } | ||
| 372 | |||
| 373 | return result; | ||
| 374 | } | ||
| 375 | |||
| 376 | // Update amount of mapped physical memory. | ||
| 377 | physical_memory_mapped += size - mapped_size; | ||
| 378 | |||
| 379 | return RESULT_SUCCESS; | ||
| 380 | } | ||
| 381 | |||
| 382 | ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { | ||
| 383 | const auto end_addr = target + size; | ||
| 384 | const auto last_addr = end_addr - 1; | ||
| 385 | VAddr cur_addr = target; | ||
| 386 | |||
| 387 | ResultCode result = RESULT_SUCCESS; | ||
| 388 | |||
| 389 | // Check how much memory is currently mapped. | ||
| 390 | const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); | ||
| 391 | if (mapped_size_result.Failed()) { | ||
| 392 | return mapped_size_result.Code(); | ||
| 393 | } | ||
| 394 | |||
| 395 | // If we've already unmapped all the memory, return early. | ||
| 396 | const std::size_t mapped_size = *mapped_size_result; | ||
| 397 | if (mapped_size == 0) { | ||
| 398 | return RESULT_SUCCESS; | ||
| 399 | } | ||
| 400 | |||
| 401 | // Keep track of the memory regions we unmap. | ||
| 402 | std::vector<std::pair<u64, u64>> unmapped_regions; | ||
| 403 | |||
| 404 | // Try to unmap regions. | ||
| 405 | { | ||
| 406 | cur_addr = target; | ||
| 407 | |||
| 408 | auto iter = FindVMA(target); | ||
| 409 | ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); | ||
| 410 | |||
| 411 | while (true) { | ||
| 412 | const auto& vma = iter->second; | ||
| 413 | const auto vma_start = vma.base; | ||
| 414 | const auto vma_end = vma_start + vma.size; | ||
| 415 | const auto vma_last = vma_end - 1; | ||
| 416 | |||
| 417 | // Unmap the memory block | ||
| 418 | const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 419 | if (vma.state == MemoryState::Heap) { | ||
| 420 | result = UnmapRange(cur_addr, unmap_size); | ||
| 421 | if (result.IsError()) { | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | |||
| 425 | unmapped_regions.emplace_back(cur_addr, unmap_size); | ||
| 426 | } | ||
| 427 | |||
| 428 | // Break once we hit the end of the range. | ||
| 429 | if (last_addr <= vma_last) { | ||
| 430 | break; | ||
| 431 | } | ||
| 432 | |||
| 433 | // Advance to the next block. | ||
| 434 | cur_addr = vma_end; | ||
| 435 | iter = FindVMA(cur_addr); | ||
| 436 | ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); | ||
| 437 | } | ||
| 438 | } | ||
| 439 | |||
| 440 | // If we failed, re-map regions. | ||
| 441 | // TODO: Preserve memory contents? | ||
| 442 | if (result.IsError()) { | ||
| 443 | for (const auto [map_address, map_size] : unmapped_regions) { | ||
| 444 | const auto remap_res = | ||
| 445 | MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0, | ||
| 446 | map_size, MemoryState::Heap, VMAPermission::None); | ||
| 447 | ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | // Update mapped amount | ||
| 452 | physical_memory_mapped -= mapped_size; | ||
| 453 | |||
| 454 | return RESULT_SUCCESS; | ||
| 455 | } | ||
| 456 | |||
| 291 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { | 457 | ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { |
| 292 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; | 458 | constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; |
| 293 | const auto src_check_result = CheckRangeState( | 459 | const auto src_check_result = CheckRangeState( |
| @@ -435,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem | |||
| 435 | // Protect mirror with permissions from old region | 601 | // Protect mirror with permissions from old region |
| 436 | Reprotect(new_vma, vma->second.permissions); | 602 | Reprotect(new_vma, vma->second.permissions); |
| 437 | // Remove permissions from old region | 603 | // Remove permissions from old region |
| 438 | Reprotect(vma, VMAPermission::None); | 604 | ReprotectRange(src_addr, size, VMAPermission::None); |
| 439 | 605 | ||
| 440 | return RESULT_SUCCESS; | 606 | return RESULT_SUCCESS; |
| 441 | } | 607 | } |
| @@ -568,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { | |||
| 568 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | 734 | VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { |
| 569 | const VMAIter next_vma = std::next(iter); | 735 | const VMAIter next_vma = std::next(iter); |
| 570 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { | 736 | if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { |
| 571 | iter->second.size += next_vma->second.size; | 737 | MergeAdjacentVMA(iter->second, next_vma->second); |
| 572 | vma_map.erase(next_vma); | 738 | vma_map.erase(next_vma); |
| 573 | } | 739 | } |
| 574 | 740 | ||
| 575 | if (iter != vma_map.begin()) { | 741 | if (iter != vma_map.begin()) { |
| 576 | VMAIter prev_vma = std::prev(iter); | 742 | VMAIter prev_vma = std::prev(iter); |
| 577 | if (prev_vma->second.CanBeMergedWith(iter->second)) { | 743 | if (prev_vma->second.CanBeMergedWith(iter->second)) { |
| 578 | prev_vma->second.size += iter->second.size; | 744 | MergeAdjacentVMA(prev_vma->second, iter->second); |
| 579 | vma_map.erase(iter); | 745 | vma_map.erase(iter); |
| 580 | iter = prev_vma; | 746 | iter = prev_vma; |
| 581 | } | 747 | } |
| @@ -584,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { | |||
| 584 | return iter; | 750 | return iter; |
| 585 | } | 751 | } |
| 586 | 752 | ||
| 753 | void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) { | ||
| 754 | ASSERT(left.CanBeMergedWith(right)); | ||
| 755 | |||
| 756 | // Always merge allocated memory blocks, even when they don't share the same backing block. | ||
| 757 | if (left.type == VMAType::AllocatedMemoryBlock && | ||
| 758 | (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { | ||
| 759 | // Check if we can save work. | ||
| 760 | if (left.offset == 0 && left.size == left.backing_block->size()) { | ||
| 761 | // Fast case: left is an entire backing block. | ||
| 762 | left.backing_block->insert(left.backing_block->end(), | ||
| 763 | right.backing_block->begin() + right.offset, | ||
| 764 | right.backing_block->begin() + right.offset + right.size); | ||
| 765 | } else { | ||
| 766 | // Slow case: make a new memory block for left and right. | ||
| 767 | auto new_memory = std::make_shared<std::vector<u8>>(); | ||
| 768 | new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, | ||
| 769 | left.backing_block->begin() + left.offset + left.size); | ||
| 770 | new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, | ||
| 771 | right.backing_block->begin() + right.offset + right.size); | ||
| 772 | left.backing_block = new_memory; | ||
| 773 | left.offset = 0; | ||
| 774 | } | ||
| 775 | |||
| 776 | // Page table update is needed, because backing memory changed. | ||
| 777 | left.size += right.size; | ||
| 778 | UpdatePageTableForVMA(left); | ||
| 779 | } else { | ||
| 780 | // Just update the size. | ||
| 781 | left.size += right.size; | ||
| 782 | } | ||
| 783 | } | ||
| 784 | |||
| 587 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { | 785 | void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { |
| 588 | switch (vma.type) { | 786 | switch (vma.type) { |
| 589 | case VMAType::Free: | 787 | case VMAType::Free: |
| @@ -758,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo | |||
| 758 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); | 956 | std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); |
| 759 | } | 957 | } |
| 760 | 958 | ||
| 959 | ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, | ||
| 960 | std::size_t size) const { | ||
| 961 | const VAddr end_addr = address + size; | ||
| 962 | const VAddr last_addr = end_addr - 1; | ||
| 963 | std::size_t mapped_size = 0; | ||
| 964 | |||
| 965 | VAddr cur_addr = address; | ||
| 966 | auto iter = FindVMA(cur_addr); | ||
| 967 | ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); | ||
| 968 | |||
| 969 | while (true) { | ||
| 970 | const auto& vma = iter->second; | ||
| 971 | const VAddr vma_start = vma.base; | ||
| 972 | const VAddr vma_end = vma_start + vma.size; | ||
| 973 | const VAddr vma_last = vma_end - 1; | ||
| 974 | |||
| 975 | // Add size if relevant. | ||
| 976 | if (vma.state != MemoryState::Unmapped) { | ||
| 977 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 978 | } | ||
| 979 | |||
| 980 | // Break once we hit the end of the range. | ||
| 981 | if (last_addr <= vma_last) { | ||
| 982 | break; | ||
| 983 | } | ||
| 984 | |||
| 985 | // Advance to the next block. | ||
| 986 | cur_addr = vma_end; | ||
| 987 | iter = std::next(iter); | ||
| 988 | ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); | ||
| 989 | } | ||
| 990 | |||
| 991 | return MakeResult(mapped_size); | ||
| 992 | } | ||
| 993 | |||
| 994 | ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 995 | std::size_t size) const { | ||
| 996 | const VAddr end_addr = address + size; | ||
| 997 | const VAddr last_addr = end_addr - 1; | ||
| 998 | std::size_t mapped_size = 0; | ||
| 999 | |||
| 1000 | VAddr cur_addr = address; | ||
| 1001 | auto iter = FindVMA(cur_addr); | ||
| 1002 | ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); | ||
| 1003 | |||
| 1004 | while (true) { | ||
| 1005 | const auto& vma = iter->second; | ||
| 1006 | const auto vma_start = vma.base; | ||
| 1007 | const auto vma_end = vma_start + vma.size; | ||
| 1008 | const auto vma_last = vma_end - 1; | ||
| 1009 | const auto state = vma.state; | ||
| 1010 | const auto attr = vma.attribute; | ||
| 1011 | |||
| 1012 | // Memory within region must be free or mapped heap. | ||
| 1013 | if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) || | ||
| 1014 | (state == MemoryState::Unmapped))) { | ||
| 1015 | return ERR_INVALID_ADDRESS_STATE; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | // Add size if relevant. | ||
| 1019 | if (state != MemoryState::Unmapped) { | ||
| 1020 | mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | // Break once we hit the end of the range. | ||
| 1024 | if (last_addr <= vma_last) { | ||
| 1025 | break; | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | // Advance to the next block. | ||
| 1029 | cur_addr = vma_end; | ||
| 1030 | iter = std::next(iter); | ||
| 1031 | ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); | ||
| 1032 | } | ||
| 1033 | |||
| 1034 | return MakeResult(mapped_size); | ||
| 1035 | } | ||
| 1036 | |||
| 761 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { | 1037 | u64 VMManager::GetTotalPhysicalMemoryAvailable() const { |
| 762 | LOG_WARNING(Kernel, "(STUBBED) called"); | 1038 | LOG_WARNING(Kernel, "(STUBBED) called"); |
| 763 | return 0xF8000000; | 1039 | return 0xF8000000; |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 9fe6ac3f4..0aecb7499 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -349,7 +349,8 @@ public: | |||
| 349 | * @param state MemoryState tag to attach to the VMA. | 349 | * @param state MemoryState tag to attach to the VMA. |
| 350 | */ | 350 | */ |
| 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, | 351 | ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, |
| 352 | std::size_t offset, u64 size, MemoryState state); | 352 | std::size_t offset, u64 size, MemoryState state, |
| 353 | VMAPermission perm = VMAPermission::ReadWrite); | ||
| 353 | 354 | ||
| 354 | /** | 355 | /** |
| 355 | * Maps an unmanaged host memory pointer at a given address. | 356 | * Maps an unmanaged host memory pointer at a given address. |
| @@ -450,6 +451,34 @@ public: | |||
| 450 | /// | 451 | /// |
| 451 | ResultVal<VAddr> SetHeapSize(u64 size); | 452 | ResultVal<VAddr> SetHeapSize(u64 size); |
| 452 | 453 | ||
| 454 | /// Maps memory at a given address. | ||
| 455 | /// | ||
| 456 | /// @param addr The virtual address to map memory at. | ||
| 457 | /// @param size The amount of memory to map. | ||
| 458 | /// | ||
| 459 | /// @note The destination address must lie within the Map region. | ||
| 460 | /// | ||
| 461 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 462 | /// however, this is just because if it were not then the | ||
| 463 | /// resulting page tables could be exploited on hardware by | ||
| 464 | /// a malicious program. SystemResource usage does not need | ||
| 465 | /// to be explicitly checked or updated here. | ||
| 466 | ResultCode MapPhysicalMemory(VAddr target, u64 size); | ||
| 467 | |||
| 468 | /// Unmaps memory at a given address. | ||
| 469 | /// | ||
| 470 | /// @param addr The virtual address to unmap memory at. | ||
| 471 | /// @param size The amount of memory to unmap. | ||
| 472 | /// | ||
| 473 | /// @note The destination address must lie within the Map region. | ||
| 474 | /// | ||
| 475 | /// @note This function requires that SystemResourceSize be non-zero, | ||
| 476 | /// however, this is just because if it were not then the | ||
| 477 | /// resulting page tables could be exploited on hardware by | ||
| 478 | /// a malicious program. SystemResource usage does not need | ||
| 479 | /// to be explicitly checked or updated here. | ||
| 480 | ResultCode UnmapPhysicalMemory(VAddr target, u64 size); | ||
| 481 | |||
| 453 | /// Maps a region of memory as code memory. | 482 | /// Maps a region of memory as code memory. |
| 454 | /// | 483 | /// |
| 455 | /// @param dst_address The base address of the region to create the aliasing memory region. | 484 | /// @param dst_address The base address of the region to create the aliasing memory region. |
| @@ -657,6 +686,11 @@ private: | |||
| 657 | */ | 686 | */ |
| 658 | VMAIter MergeAdjacent(VMAIter vma); | 687 | VMAIter MergeAdjacent(VMAIter vma); |
| 659 | 688 | ||
| 689 | /** | ||
| 690 | * Merges two adjacent VMAs. | ||
| 691 | */ | ||
| 692 | void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right); | ||
| 693 | |||
| 660 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. | 694 | /// Updates the pages corresponding to this VMA so they match the VMA's attributes. |
| 661 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); | 695 | void UpdatePageTableForVMA(const VirtualMemoryArea& vma); |
| 662 | 696 | ||
| @@ -701,6 +735,13 @@ private: | |||
| 701 | MemoryAttribute attribute_mask, MemoryAttribute attribute, | 735 | MemoryAttribute attribute_mask, MemoryAttribute attribute, |
| 702 | MemoryAttribute ignore_mask) const; | 736 | MemoryAttribute ignore_mask) const; |
| 703 | 737 | ||
| 738 | /// Gets the amount of memory currently mapped (state != Unmapped) in a range. | ||
| 739 | ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; | ||
| 740 | |||
| 741 | /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range. | ||
| 742 | ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address, | ||
| 743 | std::size_t size) const; | ||
| 744 | |||
| 704 | /** | 745 | /** |
| 705 | * A map covering the entirety of the managed address space, keyed by the `base` field of each | 746 | * A map covering the entirety of the managed address space, keyed by the `base` field of each |
| 706 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant | 747 | * VMA. It must always be modified by splitting or merging VMAs, so that the invariant |
| @@ -742,6 +783,11 @@ private: | |||
| 742 | // end of the range. This is essentially 'base_address + current_size'. | 783 | // end of the range. This is essentially 'base_address + current_size'. |
| 743 | VAddr heap_end = 0; | 784 | VAddr heap_end = 0; |
| 744 | 785 | ||
| 786 | // The current amount of memory mapped via MapPhysicalMemory. | ||
| 787 | // This is used here (and in Nintendo's kernel) only for debugging, and does not impact | ||
| 788 | // any behavior. | ||
| 789 | u64 physical_memory_mapped = 0; | ||
| 790 | |||
| 745 | Core::System& system; | 791 | Core::System& system; |
| 746 | }; | 792 | }; |
| 747 | } // namespace Kernel | 793 | } // namespace Kernel |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cd32c65d3..7c18c27b3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache.h | ||
| 2 | dma_pusher.cpp | 3 | dma_pusher.cpp |
| 3 | dma_pusher.h | 4 | dma_pusher.h |
| 4 | debug_utils/debug_utils.cpp | 5 | debug_utils/debug_utils.cpp |
| @@ -43,8 +44,6 @@ add_library(video_core STATIC | |||
| 43 | renderer_opengl/gl_device.h | 44 | renderer_opengl/gl_device.h |
| 44 | renderer_opengl/gl_framebuffer_cache.cpp | 45 | renderer_opengl/gl_framebuffer_cache.cpp |
| 45 | renderer_opengl/gl_framebuffer_cache.h | 46 | renderer_opengl/gl_framebuffer_cache.h |
| 46 | renderer_opengl/gl_global_cache.cpp | ||
| 47 | renderer_opengl/gl_global_cache.h | ||
| 48 | renderer_opengl/gl_rasterizer.cpp | 47 | renderer_opengl/gl_rasterizer.cpp |
| 49 | renderer_opengl/gl_rasterizer.h | 48 | renderer_opengl/gl_rasterizer.h |
| 50 | renderer_opengl/gl_resource_manager.cpp | 49 | renderer_opengl/gl_resource_manager.cpp |
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h new file mode 100644 index 000000000..6f868b8b4 --- /dev/null +++ b/src/video_core/buffer_cache.h | |||
| @@ -0,0 +1,299 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "core/core.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_cache.h" | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RasterizerInterface; | ||
| 23 | } | ||
| 24 | |||
| 25 | namespace VideoCommon { | ||
| 26 | |||
| 27 | template <typename BufferStorageType> | ||
| 28 | class CachedBuffer final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) | ||
| 31 | : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} | ||
| 32 | ~CachedBuffer() override = default; | ||
| 33 | |||
| 34 | VAddr GetCpuAddr() const override { | ||
| 35 | return cpu_addr; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::size_t GetSizeInBytes() const override { | ||
| 39 | return size; | ||
| 40 | } | ||
| 41 | |||
| 42 | u8* GetWritableHostPtr() const { | ||
| 43 | return host_ptr; | ||
| 44 | } | ||
| 45 | |||
| 46 | std::size_t GetSize() const { | ||
| 47 | return size; | ||
| 48 | } | ||
| 49 | |||
| 50 | std::size_t GetCapacity() const { | ||
| 51 | return capacity; | ||
| 52 | } | ||
| 53 | |||
| 54 | bool IsInternalized() const { | ||
| 55 | return is_internal; | ||
| 56 | } | ||
| 57 | |||
| 58 | const BufferStorageType& GetBuffer() const { | ||
| 59 | return buffer; | ||
| 60 | } | ||
| 61 | |||
| 62 | void SetSize(std::size_t new_size) { | ||
| 63 | size = new_size; | ||
| 64 | } | ||
| 65 | |||
| 66 | void SetInternalState(bool is_internal_) { | ||
| 67 | is_internal = is_internal_; | ||
| 68 | } | ||
| 69 | |||
| 70 | BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { | ||
| 71 | capacity = new_capacity; | ||
| 72 | std::swap(buffer, buffer_); | ||
| 73 | return buffer_; | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | u8* host_ptr{}; | ||
| 78 | VAddr cpu_addr{}; | ||
| 79 | std::size_t size{}; | ||
| 80 | std::size_t capacity{}; | ||
| 81 | bool is_internal{}; | ||
| 82 | BufferStorageType buffer; | ||
| 83 | }; | ||
| 84 | |||
| 85 | template <typename BufferStorageType, typename BufferType, typename StreamBuffer> | ||
| 86 | class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { | ||
| 87 | public: | ||
| 88 | using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; | ||
| 89 | using BufferInfo = std::pair<const BufferType*, u64>; | ||
| 90 | |||
| 91 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | ||
| 92 | std::unique_ptr<StreamBuffer> stream_buffer) | ||
| 93 | : RasterizerCache<Buffer>{rasterizer}, system{system}, | ||
| 94 | stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ | ||
| 95 | this->stream_buffer->GetHandle()} {} | ||
| 96 | ~BufferCache() = default; | ||
| 97 | |||
| 98 | void Unregister(const Buffer& entry) override { | ||
| 99 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 100 | if (entry->IsInternalized()) { | ||
| 101 | internalized_entries.erase(entry->GetCacheAddr()); | ||
| 102 | } | ||
| 103 | ReserveBuffer(entry); | ||
| 104 | RasterizerCache<Buffer>::Unregister(entry); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TickFrame() { | ||
| 108 | marked_for_destruction_index = | ||
| 109 | (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); | ||
| 110 | MarkedForDestruction().clear(); | ||
| 111 | } | ||
| 112 | |||
| 113 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 114 | bool internalize = false, bool is_written = false) { | ||
| 115 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 116 | |||
| 117 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 118 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 119 | if (!host_ptr) { | ||
| 120 | return {GetEmptyBuffer(size), 0}; | ||
| 121 | } | ||
| 122 | const auto cache_addr = ToCacheAddr(host_ptr); | ||
| 123 | |||
| 124 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 125 | // TODO: Figure out which size is the best for given games. | ||
| 126 | constexpr std::size_t max_stream_size = 0x800; | ||
| 127 | if (!internalize && size < max_stream_size && | ||
| 128 | internalized_entries.find(cache_addr) == internalized_entries.end()) { | ||
| 129 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 130 | } | ||
| 131 | |||
| 132 | auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); | ||
| 133 | if (!entry) { | ||
| 134 | return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); | ||
| 135 | } | ||
| 136 | |||
| 137 | if (entry->GetSize() < size) { | ||
| 138 | IncreaseBufferSize(entry, size); | ||
| 139 | } | ||
| 140 | if (is_written) { | ||
| 141 | entry->MarkAsModified(true, *this); | ||
| 142 | } | ||
| 143 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 144 | } | ||
| 145 | |||
| 146 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | ||
| 147 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 148 | std::size_t alignment = 4) { | ||
| 149 | std::lock_guard lock{RasterizerCache<Buffer>::mutex}; | ||
| 150 | return StreamBufferUpload(raw_pointer, size, alignment); | ||
| 151 | } | ||
| 152 | |||
| 153 | void Map(std::size_t max_size) { | ||
| 154 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | ||
| 155 | buffer_offset = buffer_offset_base; | ||
| 156 | } | ||
| 157 | |||
| 158 | /// Finishes the upload stream, returns true on bindings invalidation. | ||
| 159 | bool Unmap() { | ||
| 160 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | ||
| 161 | return std::exchange(invalidated, false); | ||
| 162 | } | ||
| 163 | |||
| 164 | virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; | ||
| 165 | |||
| 166 | protected: | ||
| 167 | void FlushObjectInner(const Buffer& entry) override { | ||
| 168 | DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); | ||
| 169 | } | ||
| 170 | |||
| 171 | virtual BufferStorageType CreateBuffer(std::size_t size) = 0; | ||
| 172 | |||
| 173 | virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; | ||
| 174 | |||
| 175 | virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 176 | std::size_t size, const u8* data) = 0; | ||
| 177 | |||
| 178 | virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, | ||
| 179 | std::size_t size, u8* data) = 0; | ||
| 180 | |||
| 181 | virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, | ||
| 182 | std::size_t src_offset, std::size_t dst_offset, | ||
| 183 | std::size_t size) = 0; | ||
| 184 | |||
| 185 | private: | ||
| 186 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | ||
| 187 | std::size_t alignment) { | ||
| 188 | AlignBuffer(alignment); | ||
| 189 | const std::size_t uploaded_offset = buffer_offset; | ||
| 190 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 191 | |||
| 192 | buffer_ptr += size; | ||
| 193 | buffer_offset += size; | ||
| 194 | return {&stream_buffer_handle, uploaded_offset}; | ||
| 195 | } | ||
| 196 | |||
| 197 | BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, | ||
| 198 | bool internalize, bool is_written) { | ||
| 199 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 200 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | ||
| 201 | ASSERT(cpu_addr); | ||
| 202 | |||
| 203 | auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); | ||
| 204 | entry->SetSize(size); | ||
| 205 | entry->SetInternalState(internalize); | ||
| 206 | RasterizerCache<Buffer>::Register(entry); | ||
| 207 | |||
| 208 | if (internalize) { | ||
| 209 | internalized_entries.emplace(ToCacheAddr(host_ptr)); | ||
| 210 | } | ||
| 211 | if (is_written) { | ||
| 212 | entry->MarkAsModified(true, *this); | ||
| 213 | } | ||
| 214 | |||
| 215 | if (entry->GetCapacity() < size) { | ||
| 216 | MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); | ||
| 217 | } | ||
| 218 | |||
| 219 | UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); | ||
| 220 | return {ToHandle(entry->GetBuffer()), 0}; | ||
| 221 | } | ||
| 222 | |||
| 223 | void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { | ||
| 224 | const std::size_t old_size = entry->GetSize(); | ||
| 225 | if (entry->GetCapacity() < new_size) { | ||
| 226 | const auto& old_buffer = entry->GetBuffer(); | ||
| 227 | auto new_buffer = CreateBuffer(new_size); | ||
| 228 | |||
| 229 | // Copy bits from the old buffer to the new buffer. | ||
| 230 | CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); | ||
| 231 | MarkedForDestruction().push_back( | ||
| 232 | entry->ExchangeBuffer(std::move(new_buffer), new_size)); | ||
| 233 | |||
| 234 | // This buffer could have been used | ||
| 235 | invalidated = true; | ||
| 236 | } | ||
| 237 | // Upload the new bits. | ||
| 238 | const std::size_t size_diff = new_size - old_size; | ||
| 239 | UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); | ||
| 240 | |||
| 241 | // Update entry's size in the object and in the cache. | ||
| 242 | Unregister(entry); | ||
| 243 | |||
| 244 | entry->SetSize(new_size); | ||
| 245 | RasterizerCache<Buffer>::Register(entry); | ||
| 246 | } | ||
| 247 | |||
| 248 | Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { | ||
| 249 | if (auto entry = TryGetReservedBuffer(host_ptr)) { | ||
| 250 | return entry; | ||
| 251 | } | ||
| 252 | return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); | ||
| 253 | } | ||
| 254 | |||
| 255 | Buffer TryGetReservedBuffer(u8* host_ptr) { | ||
| 256 | const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); | ||
| 257 | if (it == buffer_reserve.end()) { | ||
| 258 | return {}; | ||
| 259 | } | ||
| 260 | auto& reserve = it->second; | ||
| 261 | auto entry = reserve.back(); | ||
| 262 | reserve.pop_back(); | ||
| 263 | return entry; | ||
| 264 | } | ||
| 265 | |||
| 266 | void ReserveBuffer(Buffer entry) { | ||
| 267 | buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); | ||
| 268 | } | ||
| 269 | |||
| 270 | void AlignBuffer(std::size_t alignment) { | ||
| 271 | // Align the offset, not the mapped pointer | ||
| 272 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | ||
| 273 | buffer_ptr += offset_aligned - buffer_offset; | ||
| 274 | buffer_offset = offset_aligned; | ||
| 275 | } | ||
| 276 | |||
| 277 | std::vector<BufferStorageType>& MarkedForDestruction() { | ||
| 278 | return marked_for_destruction_ring_buffer[marked_for_destruction_index]; | ||
| 279 | } | ||
| 280 | |||
| 281 | Core::System& system; | ||
| 282 | |||
| 283 | std::unique_ptr<StreamBuffer> stream_buffer; | ||
| 284 | BufferType stream_buffer_handle{}; | ||
| 285 | |||
| 286 | bool invalidated = false; | ||
| 287 | |||
| 288 | u8* buffer_ptr = nullptr; | ||
| 289 | u64 buffer_offset = 0; | ||
| 290 | u64 buffer_offset_base = 0; | ||
| 291 | |||
| 292 | std::size_t marked_for_destruction_index = 0; | ||
| 293 | std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; | ||
| 294 | |||
| 295 | std::unordered_set<CacheAddr> internalized_entries; | ||
| 296 | std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; | ||
| 297 | }; | ||
| 298 | |||
| 299 | } // namespace VideoCommon | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e314944..8d15c8a48 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -67,6 +67,7 @@ public: | |||
| 67 | static constexpr std::size_t MaxShaderStage = 5; | 67 | static constexpr std::size_t MaxShaderStage = 5; |
| 68 | // Maximum number of const buffers per shader stage. | 68 | // Maximum number of const buffers per shader stage. |
| 69 | static constexpr std::size_t MaxConstBuffers = 18; | 69 | static constexpr std::size_t MaxConstBuffers = 18; |
| 70 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | ||
| 70 | 71 | ||
| 71 | enum class QueryMode : u32 { | 72 | enum class QueryMode : u32 { |
| 72 | Write = 0, | 73 | Write = 0, |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c3055602b..79d469b88 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -78,7 +78,7 @@ union Attribute { | |||
| 78 | constexpr explicit Attribute(u64 value) : value(value) {} | 78 | constexpr explicit Attribute(u64 value) : value(value) {} |
| 79 | 79 | ||
| 80 | enum class Index : u64 { | 80 | enum class Index : u64 { |
| 81 | PointSize = 6, | 81 | LayerViewportPointSize = 6, |
| 82 | Position = 7, | 82 | Position = 7, |
| 83 | Attribute_0 = 8, | 83 | Attribute_0 = 8, |
| 84 | Attribute_31 = 39, | 84 | Attribute_31 = 39, |
| @@ -1278,6 +1278,7 @@ union Instruction { | |||
| 1278 | union { | 1278 | union { |
| 1279 | BitField<49, 1, u64> nodep_flag; | 1279 | BitField<49, 1, u64> nodep_flag; |
| 1280 | BitField<53, 4, u64> texture_info; | 1280 | BitField<53, 4, u64> texture_info; |
| 1281 | BitField<59, 1, u64> fp32_flag; | ||
| 1281 | 1282 | ||
| 1282 | TextureType GetTextureType() const { | 1283 | TextureType GetTextureType() const { |
| 1283 | // The TLDS instruction has a weird encoding for the texture type. | 1284 | // The TLDS instruction has a weird encoding for the texture type. |
| @@ -1776,7 +1777,7 @@ private: | |||
| 1776 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), | 1777 | INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), |
| 1777 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), | 1778 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1778 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), | 1779 | INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), |
| 1779 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), | 1780 | INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1780 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), | 1781 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1781 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), | 1782 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1782 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), | 1783 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 52706505b..1b4975498 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 31 | 31 | ||
| 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { | 32 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { |
| 33 | auto& rasterizer{renderer.Rasterizer()}; | 33 | auto& rasterizer{renderer.Rasterizer()}; |
| 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); | 34 | memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); |
| 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 35 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 36 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
| 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | 37 | fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692..9f59a2dc1 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -4,14 +4,18 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | ||
| 7 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 8 | #include "video_core/macro_interpreter.h" | 9 | #include "video_core/macro_interpreter.h" |
| 9 | 10 | ||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||
| 12 | |||
| 10 | namespace Tegra { | 13 | namespace Tegra { |
| 11 | 14 | ||
| 12 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 15 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 13 | 16 | ||
| 14 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | 17 | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { |
| 18 | MICROPROFILE_SCOPE(MacroInterp); | ||
| 15 | Reset(); | 19 | Reset(); |
| 16 | registers[1] = parameters[0]; | 20 | registers[1] = parameters[0]; |
| 17 | this->parameters = std::move(parameters); | 21 | this->parameters = std::move(parameters); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 322453116..bffae940c 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -5,13 +5,17 @@ | |||
| 5 | #include "common/alignment.h" | 5 | #include "common/alignment.h" |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | ||
| 9 | #include "core/hle/kernel/process.h" | ||
| 10 | #include "core/hle/kernel/vm_manager.h" | ||
| 8 | #include "core/memory.h" | 11 | #include "core/memory.h" |
| 9 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 11 | 14 | ||
| 12 | namespace Tegra { | 15 | namespace Tegra { |
| 13 | 16 | ||
| 14 | MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { | 17 | MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
| 18 | : rasterizer{rasterizer}, system{system} { | ||
| 15 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); | 19 | std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); |
| 16 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), | 20 | std::fill(page_table.attributes.begin(), page_table.attributes.end(), |
| 17 | Common::PageType::Unmapped); | 21 | Common::PageType::Unmapped); |
| @@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { | |||
| 49 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; | 53 | const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; |
| 50 | 54 | ||
| 51 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 55 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 56 | ASSERT(system.CurrentProcess() | ||
| 57 | ->VMManager() | ||
| 58 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 59 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 60 | .IsSuccess()); | ||
| 52 | 61 | ||
| 53 | return gpu_addr; | 62 | return gpu_addr; |
| 54 | } | 63 | } |
| @@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) | |||
| 59 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 68 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 60 | 69 | ||
| 61 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); | 70 | MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); |
| 62 | 71 | ASSERT(system.CurrentProcess() | |
| 72 | ->VMManager() | ||
| 73 | .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 74 | Kernel::MemoryAttribute::DeviceMapped) | ||
| 75 | .IsSuccess()); | ||
| 63 | return gpu_addr; | 76 | return gpu_addr; |
| 64 | } | 77 | } |
| 65 | 78 | ||
| @@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 68 | 81 | ||
| 69 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 82 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 70 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | 83 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; |
| 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | ||
| 85 | ASSERT(cpu_addr); | ||
| 71 | 86 | ||
| 72 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); | 87 | rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); |
| 73 | UnmapRange(gpu_addr, aligned_size); | 88 | UnmapRange(gpu_addr, aligned_size); |
| 89 | ASSERT(system.CurrentProcess() | ||
| 90 | ->VMManager() | ||
| 91 | .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, | ||
| 92 | Kernel::MemoryAttribute::None) | ||
| 93 | .IsSuccess()); | ||
| 74 | 94 | ||
| 75 | return gpu_addr; | 95 | return gpu_addr; |
| 76 | } | 96 | } |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 43a84bd52..aea010087 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -14,6 +14,10 @@ namespace VideoCore { | |||
| 14 | class RasterizerInterface; | 14 | class RasterizerInterface; |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | namespace Core { | ||
| 18 | class System; | ||
| 19 | } | ||
| 20 | |||
| 17 | namespace Tegra { | 21 | namespace Tegra { |
| 18 | 22 | ||
| 19 | /** | 23 | /** |
| @@ -47,7 +51,7 @@ struct VirtualMemoryArea { | |||
| 47 | 51 | ||
| 48 | class MemoryManager final { | 52 | class MemoryManager final { |
| 49 | public: | 53 | public: |
| 50 | explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); | 54 | explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); |
| 51 | ~MemoryManager(); | 55 | ~MemoryManager(); |
| 52 | 56 | ||
| 53 | GPUVAddr AllocateSpace(u64 size, u64 align); | 57 | GPUVAddr AllocateSpace(u64 size, u64 align); |
| @@ -173,6 +177,8 @@ private: | |||
| 173 | Common::PageTable page_table{page_bits}; | 177 | Common::PageTable page_table{page_bits}; |
| 174 | VMAMap vma_map; | 178 | VMAMap vma_map; |
| 175 | VideoCore::RasterizerInterface& rasterizer; | 179 | VideoCore::RasterizerInterface& rasterizer; |
| 180 | |||
| 181 | Core::System& system; | ||
| 176 | }; | 182 | }; |
| 177 | 183 | ||
| 178 | } // namespace Tegra | 184 | } // namespace Tegra |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5ee4f8e8e..2b7367568 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -47,6 +47,9 @@ public: | |||
| 47 | /// and invalidated | 47 | /// and invalidated |
| 48 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 48 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 49 | 49 | ||
| 50 | /// Notify rasterizer that a frame is about to finish | ||
| 51 | virtual void TickFrame() = 0; | ||
| 52 | |||
| 50 | /// Attempt to use a faster method to perform a surface copy | 53 | /// Attempt to use a faster method to perform a surface copy |
| 51 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 54 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 52 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 55 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e..2a9b523f5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,103 +2,57 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | ||
| 6 | #include <memory> | 5 | #include <memory> |
| 7 | 6 | ||
| 8 | #include "common/alignment.h" | 7 | #include <glad/glad.h> |
| 9 | #include "core/core.h" | 8 | |
| 10 | #include "video_core/memory_manager.h" | 9 | #include "common/assert.h" |
| 11 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 10 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 12 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | 16 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 17 | std::size_t alignment, u8* host_ptr) | 17 | std::size_t stream_size) |
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, | 18 | : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ |
| 19 | alignment{alignment} {} | 19 | rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} |
| 20 | |||
| 21 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | ||
| 22 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||
| 23 | |||
| 24 | GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, | ||
| 25 | bool cache) { | ||
| 26 | std::lock_guard lock{mutex}; | ||
| 27 | auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||
| 28 | |||
| 29 | // Cache management is a big overhead, so only cache entries with a given size. | ||
| 30 | // TODO: Figure out which size is the best for given games. | ||
| 31 | cache &= size >= 2048; | ||
| 32 | |||
| 33 | const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; | ||
| 34 | if (cache) { | ||
| 35 | auto entry = TryGet(host_ptr); | ||
| 36 | if (entry) { | ||
| 37 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||
| 38 | return entry->GetOffset(); | ||
| 39 | } | ||
| 40 | Unregister(entry); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | 20 | ||
| 44 | AlignBuffer(alignment); | 21 | OGLBufferCache::~OGLBufferCache() = default; |
| 45 | const GLintptr uploaded_offset = buffer_offset; | ||
| 46 | 22 | ||
| 47 | if (!host_ptr) { | 23 | OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { |
| 48 | return uploaded_offset; | 24 | OGLBuffer buffer; |
| 49 | } | 25 | buffer.Create(); |
| 50 | 26 | glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | |
| 51 | std::memcpy(buffer_ptr, host_ptr, size); | 27 | return buffer; |
| 52 | buffer_ptr += size; | ||
| 53 | buffer_offset += size; | ||
| 54 | |||
| 55 | if (cache) { | ||
| 56 | auto entry = std::make_shared<CachedBufferEntry>( | ||
| 57 | *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); | ||
| 58 | Register(entry); | ||
| 59 | } | ||
| 60 | |||
| 61 | return uploaded_offset; | ||
| 62 | } | 28 | } |
| 63 | 29 | ||
| 64 | GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, | 30 | const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { |
| 65 | std::size_t alignment) { | 31 | return &buffer.handle; |
| 66 | std::lock_guard lock{mutex}; | ||
| 67 | AlignBuffer(alignment); | ||
| 68 | std::memcpy(buffer_ptr, raw_pointer, size); | ||
| 69 | const GLintptr uploaded_offset = buffer_offset; | ||
| 70 | |||
| 71 | buffer_ptr += size; | ||
| 72 | buffer_offset += size; | ||
| 73 | return uploaded_offset; | ||
| 74 | } | 32 | } |
| 75 | 33 | ||
| 76 | bool OGLBufferCache::Map(std::size_t max_size) { | 34 | const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| 77 | bool invalidate; | 35 | static const GLuint null_buffer = 0; |
| 78 | std::tie(buffer_ptr, buffer_offset_base, invalidate) = | 36 | return &null_buffer; |
| 79 | stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); | ||
| 80 | buffer_offset = buffer_offset_base; | ||
| 81 | |||
| 82 | if (invalidate) { | ||
| 83 | InvalidateAll(); | ||
| 84 | } | ||
| 85 | return invalidate; | ||
| 86 | } | 37 | } |
| 87 | 38 | ||
| 88 | void OGLBufferCache::Unmap() { | 39 | void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 89 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | 40 | const u8* data) { |
| 41 | glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 42 | static_cast<GLsizeiptr>(size), data); | ||
| 90 | } | 43 | } |
| 91 | 44 | ||
| 92 | GLuint OGLBufferCache::GetHandle() const { | 45 | void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, |
| 93 | return stream_buffer.GetHandle(); | 46 | std::size_t size, u8* data) { |
| 47 | glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 48 | static_cast<GLsizeiptr>(size), data); | ||
| 94 | } | 49 | } |
| 95 | 50 | ||
| 96 | void OGLBufferCache::AlignBuffer(std::size_t alignment) { | 51 | void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, |
| 97 | // Align the offset, not the mapped pointer | 52 | std::size_t src_offset, std::size_t dst_offset, |
| 98 | const GLintptr offset_aligned = | 53 | std::size_t size) { |
| 99 | static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | 54 | glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), |
| 100 | buffer_ptr += offset_aligned - buffer_offset; | 55 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); |
| 101 | buffer_offset = offset_aligned; | ||
| 102 | } | 56 | } |
| 103 | 57 | ||
| 104 | } // namespace OpenGL | 58 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b..8c8ac4038 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -4,80 +4,44 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | 7 | #include <memory> |
| 9 | #include <tuple> | ||
| 10 | 8 | ||
| 11 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache.h" | ||
| 12 | #include "video_core/rasterizer_cache.h" | 11 | #include "video_core/rasterizer_cache.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 15 | 14 | ||
| 15 | namespace Core { | ||
| 16 | class System; | ||
| 17 | } | ||
| 18 | |||
| 16 | namespace OpenGL { | 19 | namespace OpenGL { |
| 17 | 20 | ||
| 21 | class OGLStreamBuffer; | ||
| 18 | class RasterizerOpenGL; | 22 | class RasterizerOpenGL; |
| 19 | 23 | ||
| 20 | class CachedBufferEntry final : public RasterizerCacheObject { | 24 | class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { |
| 21 | public: | ||
| 22 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||
| 23 | std::size_t alignment, u8* host_ptr); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const override { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | std::size_t GetSizeInBytes() const override { | ||
| 30 | return size; | ||
| 31 | } | ||
| 32 | |||
| 33 | std::size_t GetSize() const { | ||
| 34 | return size; | ||
| 35 | } | ||
| 36 | |||
| 37 | GLintptr GetOffset() const { | ||
| 38 | return offset; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::size_t GetAlignment() const { | ||
| 42 | return alignment; | ||
| 43 | } | ||
| 44 | |||
| 45 | private: | ||
| 46 | VAddr cpu_addr{}; | ||
| 47 | std::size_t size{}; | ||
| 48 | GLintptr offset{}; | ||
| 49 | std::size_t alignment{}; | ||
| 50 | }; | ||
| 51 | |||
| 52 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||
| 53 | public: | 25 | public: |
| 54 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); | 26 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 55 | 27 | std::size_t stream_size); | |
| 56 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 28 | ~OGLBufferCache(); |
| 57 | /// allocated. | ||
| 58 | GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||
| 59 | bool cache = true); | ||
| 60 | 29 | ||
| 61 | /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | 30 | const GLuint* GetEmptyBuffer(std::size_t) override; |
| 62 | GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||
| 63 | |||
| 64 | bool Map(std::size_t max_size); | ||
| 65 | void Unmap(); | ||
| 66 | |||
| 67 | GLuint GetHandle() const; | ||
| 68 | 31 | ||
| 69 | protected: | 32 | protected: |
| 70 | void AlignBuffer(std::size_t alignment); | 33 | OGLBuffer CreateBuffer(std::size_t size) override; |
| 34 | |||
| 35 | const GLuint* ToHandle(const OGLBuffer& buffer) override; | ||
| 71 | 36 | ||
| 72 | // We do not have to flush this cache as things in it are never modified by us. | 37 | void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 73 | void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} | 38 | const u8* data) override; |
| 74 | 39 | ||
| 75 | private: | 40 | void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, |
| 76 | OGLStreamBuffer stream_buffer; | 41 | u8* data) override; |
| 77 | 42 | ||
| 78 | u8* buffer_ptr = nullptr; | 43 | void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, |
| 79 | GLintptr buffer_offset = 0; | 44 | std::size_t dst_offset, std::size_t size) override; |
| 80 | GLintptr buffer_offset_base = 0; | ||
| 81 | }; | 45 | }; |
| 82 | 46 | ||
| 83 | } // namespace OpenGL | 47 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2e..85424a4c9 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -24,8 +24,10 @@ T GetInteger(GLenum pname) { | |||
| 24 | 24 | ||
| 25 | Device::Device() { | 25 | Device::Device() { |
| 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 26 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 27 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||
| 27 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 28 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
| 28 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | 29 | max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); |
| 30 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | ||
| 29 | has_variable_aoffi = TestVariableAoffi(); | 31 | has_variable_aoffi = TestVariableAoffi(); |
| 30 | has_component_indexing_bug = TestComponentIndexingBug(); | 32 | has_component_indexing_bug = TestComponentIndexingBug(); |
| 31 | } | 33 | } |
| @@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) { | |||
| 34 | uniform_buffer_alignment = 0; | 36 | uniform_buffer_alignment = 0; |
| 35 | max_vertex_attributes = 16; | 37 | max_vertex_attributes = 16; |
| 36 | max_varyings = 15; | 38 | max_varyings = 15; |
| 39 | has_vertex_viewport_layer = true; | ||
| 37 | has_variable_aoffi = true; | 40 | has_variable_aoffi = true; |
| 38 | has_component_indexing_bug = false; | 41 | has_component_indexing_bug = false; |
| 39 | } | 42 | } |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..dc883722d 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -18,6 +18,10 @@ public: | |||
| 18 | return uniform_buffer_alignment; | 18 | return uniform_buffer_alignment; |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | std::size_t GetShaderStorageBufferAlignment() const { | ||
| 22 | return shader_storage_alignment; | ||
| 23 | } | ||
| 24 | |||
| 21 | u32 GetMaxVertexAttributes() const { | 25 | u32 GetMaxVertexAttributes() const { |
| 22 | return max_vertex_attributes; | 26 | return max_vertex_attributes; |
| 23 | } | 27 | } |
| @@ -26,6 +30,10 @@ public: | |||
| 26 | return max_varyings; | 30 | return max_varyings; |
| 27 | } | 31 | } |
| 28 | 32 | ||
| 33 | bool HasVertexViewportLayer() const { | ||
| 34 | return has_vertex_viewport_layer; | ||
| 35 | } | ||
| 36 | |||
| 29 | bool HasVariableAoffi() const { | 37 | bool HasVariableAoffi() const { |
| 30 | return has_variable_aoffi; | 38 | return has_variable_aoffi; |
| 31 | } | 39 | } |
| @@ -39,8 +47,10 @@ private: | |||
| 39 | static bool TestComponentIndexingBug(); | 47 | static bool TestComponentIndexingBug(); |
| 40 | 48 | ||
| 41 | std::size_t uniform_buffer_alignment{}; | 49 | std::size_t uniform_buffer_alignment{}; |
| 50 | std::size_t shader_storage_alignment{}; | ||
| 42 | u32 max_vertex_attributes{}; | 51 | u32 max_vertex_attributes{}; |
| 43 | u32 max_varyings{}; | 52 | u32 max_varyings{}; |
| 53 | bool has_vertex_viewport_layer{}; | ||
| 44 | bool has_variable_aoffi{}; | 54 | bool has_variable_aoffi{}; |
| 45 | bool has_component_indexing_bug{}; | 55 | bool has_component_indexing_bug{}; |
| 46 | }; | 56 | }; |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null | |||
| @@ -1,102 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <glad/glad.h> | ||
| 6 | |||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "core/core.h" | ||
| 9 | #include "video_core/memory_manager.h" | ||
| 10 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 13 | #include "video_core/renderer_opengl/utils.h" | ||
| 14 | |||
| 15 | namespace OpenGL { | ||
| 16 | |||
| 17 | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) | ||
| 18 | : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, | ||
| 19 | max_size{max_size} { | ||
| 20 | buffer.Create(); | ||
| 21 | LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); | ||
| 22 | } | ||
| 23 | |||
| 24 | CachedGlobalRegion::~CachedGlobalRegion() = default; | ||
| 25 | |||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | ||
| 27 | size = size_; | ||
| 28 | if (size > max_size) { | ||
| 29 | size = max_size; | ||
| 30 | LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, | ||
| 31 | max_size); | ||
| 32 | } | ||
| 33 | glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); | ||
| 34 | } | ||
| 35 | |||
| 36 | void CachedGlobalRegion::Flush() { | ||
| 37 | LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); | ||
| 38 | glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); | ||
| 39 | } | ||
| 40 | |||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { | ||
| 42 | const auto search{reserve.find(addr)}; | ||
| 43 | if (search == reserve.end()) { | ||
| 44 | return {}; | ||
| 45 | } | ||
| 46 | return search->second; | ||
| 47 | } | ||
| 48 | |||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, | ||
| 50 | u32 size) { | ||
| 51 | GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; | ||
| 52 | if (!region) { | ||
| 53 | // No reserved surface available, create a new one and reserve it | ||
| 54 | auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; | ||
| 55 | const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; | ||
| 56 | ASSERT(cpu_addr); | ||
| 57 | |||
| 58 | region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); | ||
| 59 | ReserveGlobalRegion(region); | ||
| 60 | } | ||
| 61 | region->Reload(size); | ||
| 62 | return region; | ||
| 63 | } | ||
| 64 | |||
| 65 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||
| 66 | reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); | ||
| 67 | } | ||
| 68 | |||
| 69 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||
| 70 | : RasterizerCache{rasterizer} { | ||
| 71 | GLint max_ssbo_size_; | ||
| 72 | glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); | ||
| 73 | max_ssbo_size = static_cast<u32>(max_ssbo_size_); | ||
| 74 | } | ||
| 75 | |||
| 76 | GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||
| 77 | const GLShader::GlobalMemoryEntry& global_region, | ||
| 78 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | auto& gpu{Core::System::GetInstance().GPU()}; | ||
| 82 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 83 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | ||
| 84 | const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + | ||
| 85 | global_region.GetCbufOffset()}; | ||
| 86 | const auto actual_addr{memory_manager.Read<u64>(addr)}; | ||
| 87 | const auto size{memory_manager.Read<u32>(addr + 8)}; | ||
| 88 | |||
| 89 | // Look up global region in the cache based on address | ||
| 90 | const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; | ||
| 91 | GlobalRegion region{TryGet(host_ptr)}; | ||
| 92 | |||
| 93 | if (!region) { | ||
| 94 | // No global region found - create a new one | ||
| 95 | region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); | ||
| 96 | Register(region); | ||
| 97 | } | ||
| 98 | |||
| 99 | return region; | ||
| 100 | } | ||
| 101 | |||
| 102 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a240..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null | |||
| @@ -1,82 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | |||
| 10 | #include <glad/glad.h> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | ||
| 15 | #include "video_core/rasterizer_cache.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | |||
| 18 | namespace OpenGL { | ||
| 19 | |||
| 20 | namespace GLShader { | ||
| 21 | class GlobalMemoryEntry; | ||
| 22 | } | ||
| 23 | |||
| 24 | class RasterizerOpenGL; | ||
| 25 | class CachedGlobalRegion; | ||
| 26 | using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | ||
| 27 | |||
| 28 | class CachedGlobalRegion final : public RasterizerCacheObject { | ||
| 29 | public: | ||
| 30 | explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); | ||
| 31 | ~CachedGlobalRegion(); | ||
| 32 | |||
| 33 | VAddr GetCpuAddr() const override { | ||
| 34 | return cpu_addr; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::size_t GetSizeInBytes() const override { | ||
| 38 | return size; | ||
| 39 | } | ||
| 40 | |||
| 41 | /// Gets the GL program handle for the buffer | ||
| 42 | GLuint GetBufferHandle() const { | ||
| 43 | return buffer.handle; | ||
| 44 | } | ||
| 45 | |||
| 46 | /// Reloads the global region from guest memory | ||
| 47 | void Reload(u32 size_); | ||
| 48 | |||
| 49 | void Flush(); | ||
| 50 | |||
| 51 | private: | ||
| 52 | VAddr cpu_addr{}; | ||
| 53 | u8* host_ptr{}; | ||
| 54 | u32 size{}; | ||
| 55 | u32 max_size{}; | ||
| 56 | |||
| 57 | OGLBuffer buffer; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { | ||
| 61 | public: | ||
| 62 | explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); | ||
| 63 | |||
| 64 | /// Gets the current specified shader stage program | ||
| 65 | GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, | ||
| 66 | Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); | ||
| 67 | |||
| 68 | protected: | ||
| 69 | void FlushObjectInner(const GlobalRegion& object) override { | ||
| 70 | object->Flush(); | ||
| 71 | } | ||
| 72 | |||
| 73 | private: | ||
| 74 | GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; | ||
| 75 | GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); | ||
| 76 | void ReserveGlobalRegion(GlobalRegion region); | ||
| 77 | |||
| 78 | std::unordered_map<CacheAddr, GlobalRegion> reserve; | ||
| 79 | u32 max_ssbo_size{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f45a3c5ef..0bb5c068c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "core/hle/kernel/process.h" | 20 | #include "core/hle/kernel/process.h" |
| 21 | #include "core/settings.h" | 21 | #include "core/settings.h" |
| 22 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 23 | #include "video_core/memory_manager.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 26 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| @@ -80,11 +81,25 @@ struct DrawParameters { | |||
| 80 | } | 81 | } |
| 81 | }; | 82 | }; |
| 82 | 83 | ||
| 84 | static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 85 | const GLShader::ConstBufferEntry& entry) { | ||
| 86 | if (!entry.IsIndirect()) { | ||
| 87 | return entry.GetSize(); | ||
| 88 | } | ||
| 89 | |||
| 90 | if (buffer.size > Maxwell::MaxConstBufferSize) { | ||
| 91 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||
| 92 | Maxwell::MaxConstBufferSize); | ||
| 93 | return Maxwell::MaxConstBufferSize; | ||
| 94 | } | ||
| 95 | |||
| 96 | return buffer.size; | ||
| 97 | } | ||
| 98 | |||
| 83 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 99 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 84 | ScreenInfo& info) | 100 | ScreenInfo& info) |
| 85 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, | 101 | : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, |
| 86 | global_cache{*this}, system{system}, screen_info{info}, | 102 | system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { |
| 87 | buffer_cache(*this, STREAM_BUFFER_SIZE) { | ||
| 88 | OpenGLState::ApplyDefaultState(); | 103 | OpenGLState::ApplyDefaultState(); |
| 89 | 104 | ||
| 90 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); | 105 | shader_program_manager = std::make_unique<GLShader::ProgramManager>(); |
| @@ -129,8 +144,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 129 | state.draw.vertex_array = vao; | 144 | state.draw.vertex_array = vao; |
| 130 | state.ApplyVertexArrayState(); | 145 | state.ApplyVertexArrayState(); |
| 131 | 146 | ||
| 132 | glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); | ||
| 133 | |||
| 134 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | 147 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. |
| 135 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually | 148 | // Enables the first 16 vertex attributes always, as we don't know which ones are actually |
| 136 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 | 149 | // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 |
| @@ -197,11 +210,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 197 | 210 | ||
| 198 | ASSERT(end > start); | 211 | ASSERT(end > start); |
| 199 | const u64 size = end - start + 1; | 212 | const u64 size = end - start + 1; |
| 200 | const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); | 213 | const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); |
| 201 | 214 | ||
| 202 | // Bind the vertex array to the buffer at the current offset. | 215 | // Bind the vertex array to the buffer at the current offset. |
| 203 | glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, | 216 | vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, |
| 204 | vertex_array.stride); | 217 | vertex_array.stride); |
| 205 | 218 | ||
| 206 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { | 219 | if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { |
| 207 | // Enable vertex buffer instancing with the specified divisor. | 220 | // Enable vertex buffer instancing with the specified divisor. |
| @@ -215,7 +228,19 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { | |||
| 215 | gpu.dirty_flags.vertex_array.reset(); | 228 | gpu.dirty_flags.vertex_array.reset(); |
| 216 | } | 229 | } |
| 217 | 230 | ||
| 218 | DrawParameters RasterizerOpenGL::SetupDraw() { | 231 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { |
| 232 | if (accelerate_draw != AccelDraw::Indexed) { | ||
| 233 | return 0; | ||
| 234 | } | ||
| 235 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 236 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 237 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 238 | const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 239 | vertex_array_pushbuffer.SetIndexBuffer(buffer); | ||
| 240 | return offset; | ||
| 241 | } | ||
| 242 | |||
| 243 | DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { | ||
| 219 | const auto& gpu = system.GPU().Maxwell3D(); | 244 | const auto& gpu = system.GPU().Maxwell3D(); |
| 220 | const auto& regs = gpu.regs; | 245 | const auto& regs = gpu.regs; |
| 221 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | 246 | const bool is_indexed = accelerate_draw == AccelDraw::Indexed; |
| @@ -227,11 +252,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() { | |||
| 227 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | 252 | params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); |
| 228 | 253 | ||
| 229 | if (is_indexed) { | 254 | if (is_indexed) { |
| 230 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 231 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | 255 | params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); |
| 232 | params.count = regs.index_array.count; | 256 | params.count = regs.index_array.count; |
| 233 | params.index_buffer_offset = | 257 | params.index_buffer_offset = index_buffer_offset; |
| 234 | buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||
| 235 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); | 258 | params.base_vertex = static_cast<GLint>(regs.vb_element_base); |
| 236 | } else { | 259 | } else { |
| 237 | params.count = regs.vertex_buffer.count; | 260 | params.count = regs.vertex_buffer.count; |
| @@ -247,10 +270,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 247 | BaseBindings base_bindings; | 270 | BaseBindings base_bindings; |
| 248 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 271 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 249 | 272 | ||
| 250 | // Prepare packed bindings | ||
| 251 | bind_ubo_pushbuffer.Setup(base_bindings.cbuf); | ||
| 252 | bind_ssbo_pushbuffer.Setup(base_bindings.gmem); | ||
| 253 | |||
| 254 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 273 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 255 | const auto& shader_config = gpu.regs.shader_config[index]; | 274 | const auto& shader_config = gpu.regs.shader_config[index]; |
| 256 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | 275 | const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; |
| @@ -271,12 +290,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 271 | 290 | ||
| 272 | GLShader::MaxwellUniformData ubo{}; | 291 | GLShader::MaxwellUniformData ubo{}; |
| 273 | ubo.SetFromRegs(gpu, stage); | 292 | ubo.SetFromRegs(gpu, stage); |
| 274 | const GLintptr offset = | 293 | const auto [buffer, offset] = |
| 275 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 294 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 276 | 295 | ||
| 277 | // Bind the emulation info buffer | 296 | // Bind the emulation info buffer |
| 278 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, | 297 | bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); |
| 279 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 280 | 298 | ||
| 281 | Shader shader{shader_cache.GetStageProgram(program)}; | 299 | Shader shader{shader_cache.GetStageProgram(program)}; |
| 282 | 300 | ||
| @@ -321,9 +339,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 321 | base_bindings = next_bindings; | 339 | base_bindings = next_bindings; |
| 322 | } | 340 | } |
| 323 | 341 | ||
| 324 | bind_ubo_pushbuffer.Bind(); | ||
| 325 | bind_ssbo_pushbuffer.Bind(); | ||
| 326 | |||
| 327 | SyncClipEnabled(clip_distances); | 342 | SyncClipEnabled(clip_distances); |
| 328 | 343 | ||
| 329 | gpu.dirty_flags.shaders = false; | 344 | gpu.dirty_flags.shaders = false; |
| @@ -634,26 +649,46 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 634 | Maxwell::MaxShaderStage; | 649 | Maxwell::MaxShaderStage; |
| 635 | 650 | ||
| 636 | // Add space for at least 18 constant buffers | 651 | // Add space for at least 18 constant buffers |
| 637 | buffer_size += | 652 | buffer_size += Maxwell::MaxConstBuffers * |
| 638 | Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | 653 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 639 | 654 | ||
| 640 | const bool invalidate = buffer_cache.Map(buffer_size); | 655 | // Prepare the vertex array. |
| 641 | if (invalidate) { | 656 | buffer_cache.Map(buffer_size); |
| 642 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 643 | gpu.dirty_flags.vertex_array.set(); | ||
| 644 | } | ||
| 645 | 657 | ||
| 658 | // Prepare vertex array format. | ||
| 646 | const GLuint vao = SetupVertexFormat(); | 659 | const GLuint vao = SetupVertexFormat(); |
| 660 | vertex_array_pushbuffer.Setup(vao); | ||
| 661 | |||
| 662 | // Upload vertex and index data. | ||
| 647 | SetupVertexBuffer(vao); | 663 | SetupVertexBuffer(vao); |
| 664 | const GLintptr index_buffer_offset = SetupIndexBuffer(); | ||
| 648 | 665 | ||
| 649 | DrawParameters params = SetupDraw(); | 666 | // Setup draw parameters. It will automatically choose what glDraw* method to use. |
| 667 | const DrawParameters params = SetupDraw(index_buffer_offset); | ||
| 668 | |||
| 669 | // Prepare packed bindings. | ||
| 670 | bind_ubo_pushbuffer.Setup(0); | ||
| 671 | bind_ssbo_pushbuffer.Setup(0); | ||
| 672 | |||
| 673 | // Setup shaders and their used resources. | ||
| 650 | texture_cache.GuardSamplers(true); | 674 | texture_cache.GuardSamplers(true); |
| 651 | SetupShaders(params.primitive_mode); | 675 | SetupShaders(params.primitive_mode); |
| 652 | texture_cache.GuardSamplers(false); | 676 | texture_cache.GuardSamplers(false); |
| 653 | 677 | ||
| 654 | ConfigureFramebuffers(state); | 678 | ConfigureFramebuffers(state); |
| 655 | 679 | ||
| 656 | buffer_cache.Unmap(); | 680 | // Signal the buffer cache that we are not going to upload more things. |
| 681 | const bool invalidate = buffer_cache.Unmap(); | ||
| 682 | |||
| 683 | // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. | ||
| 684 | vertex_array_pushbuffer.Bind(); | ||
| 685 | bind_ubo_pushbuffer.Bind(); | ||
| 686 | bind_ssbo_pushbuffer.Bind(); | ||
| 687 | |||
| 688 | if (invalidate) { | ||
| 689 | // As all cached buffers are invalidated, we need to recheck their state. | ||
| 690 | gpu.dirty_flags.vertex_array.set(); | ||
| 691 | } | ||
| 657 | 692 | ||
| 658 | shader_program_manager->ApplyTo(state); | 693 | shader_program_manager->ApplyTo(state); |
| 659 | state.Apply(); | 694 | state.Apply(); |
| @@ -675,7 +710,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 675 | return; | 710 | return; |
| 676 | } | 711 | } |
| 677 | texture_cache.FlushRegion(addr, size); | 712 | texture_cache.FlushRegion(addr, size); |
| 678 | global_cache.FlushRegion(addr, size); | 713 | buffer_cache.FlushRegion(addr, size); |
| 679 | } | 714 | } |
| 680 | 715 | ||
| 681 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 716 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| @@ -685,7 +720,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 685 | } | 720 | } |
| 686 | texture_cache.InvalidateRegion(addr, size); | 721 | texture_cache.InvalidateRegion(addr, size); |
| 687 | shader_cache.InvalidateRegion(addr, size); | 722 | shader_cache.InvalidateRegion(addr, size); |
| 688 | global_cache.InvalidateRegion(addr, size); | ||
| 689 | buffer_cache.InvalidateRegion(addr, size); | 723 | buffer_cache.InvalidateRegion(addr, size); |
| 690 | } | 724 | } |
| 691 | 725 | ||
| @@ -696,6 +730,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 696 | InvalidateRegion(addr, size); | 730 | InvalidateRegion(addr, size); |
| 697 | } | 731 | } |
| 698 | 732 | ||
| 733 | void RasterizerOpenGL::TickFrame() { | ||
| 734 | buffer_cache.TickFrame(); | ||
| 735 | } | ||
| 736 | |||
| 699 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 737 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 700 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 738 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 701 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 739 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| @@ -739,11 +777,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::Sh | |||
| 739 | MICROPROFILE_SCOPE(OpenGL_UBO); | 777 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 740 | const auto stage_index = static_cast<std::size_t>(stage); | 778 | const auto stage_index = static_cast<std::size_t>(stage); |
| 741 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | 779 | const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; |
| 742 | const auto& entries = shader->GetShaderEntries().const_buffers; | ||
| 743 | 780 | ||
| 744 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage | 781 | // Upload only the enabled buffers from the 16 constbuffers of each shader stage |
| 745 | for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 782 | for (const auto& entry : shader->GetShaderEntries().const_buffers) { |
| 746 | const auto& entry = entries[bindpoint]; | ||
| 747 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | 783 | SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); |
| 748 | } | 784 | } |
| 749 | } | 785 | } |
| @@ -752,46 +788,34 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
| 752 | const GLShader::ConstBufferEntry& entry) { | 788 | const GLShader::ConstBufferEntry& entry) { |
| 753 | if (!buffer.enabled) { | 789 | if (!buffer.enabled) { |
| 754 | // Set values to zero to unbind buffers | 790 | // Set values to zero to unbind buffers |
| 755 | bind_ubo_pushbuffer.Push(0, 0, 0); | 791 | bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); |
| 756 | return; | 792 | return; |
| 757 | } | 793 | } |
| 758 | 794 | ||
| 759 | std::size_t size; | ||
| 760 | if (entry.IsIndirect()) { | ||
| 761 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 762 | size = buffer.size; | ||
| 763 | |||
| 764 | if (size > MaxConstbufferSize) { | ||
| 765 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, | ||
| 766 | MaxConstbufferSize); | ||
| 767 | size = MaxConstbufferSize; | ||
| 768 | } | ||
| 769 | } else { | ||
| 770 | // Buffer is accessed directly, upload just what we use | ||
| 771 | size = entry.GetSize(); | ||
| 772 | } | ||
| 773 | |||
| 774 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | 795 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 |
| 775 | // UBO alignment requirements. | 796 | // UBO alignment requirements. |
| 776 | size = Common::AlignUp(size, sizeof(GLvec4)); | 797 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 777 | ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); | ||
| 778 | 798 | ||
| 779 | const std::size_t alignment = device.GetUniformBufferAlignment(); | 799 | const auto alignment = device.GetUniformBufferAlignment(); |
| 780 | const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); | 800 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); |
| 781 | bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); | 801 | bind_ubo_pushbuffer.Push(cbuf, offset, size); |
| 782 | } | 802 | } |
| 783 | 803 | ||
| 784 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | 804 | void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, |
| 785 | const Shader& shader) { | 805 | const Shader& shader) { |
| 786 | const auto& entries = shader->GetShaderEntries().global_memory_entries; | 806 | auto& gpu{system.GPU()}; |
| 787 | for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | 807 | auto& memory_manager{gpu.MemoryManager()}; |
| 788 | const auto& entry{entries[bindpoint]}; | 808 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; |
| 789 | const auto& region{global_cache.GetGlobalRegion(entry, stage)}; | 809 | const auto alignment{device.GetShaderStorageBufferAlignment()}; |
| 790 | if (entry.IsWritten()) { | 810 | |
| 791 | region->MarkAsModified(true, global_cache); | 811 | for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { |
| 792 | } | 812 | const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; |
| 793 | bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, | 813 | const auto actual_addr{memory_manager.Read<u64>(addr)}; |
| 794 | static_cast<GLsizeiptr>(region->GetSizeInBytes())); | 814 | const auto size{memory_manager.Read<u32>(addr + 8)}; |
| 815 | |||
| 816 | const auto [ssbo, buffer_offset] = | ||
| 817 | buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); | ||
| 818 | bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||
| 795 | } | 819 | } |
| 796 | } | 820 | } |
| 797 | 821 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d238c1257..40b571d58 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_global_cache.h" | ||
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 27 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 29 | #include "video_core/renderer_opengl/gl_sampler_cache.h" | 28 | #include "video_core/renderer_opengl/gl_sampler_cache.h" |
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 29 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| @@ -63,6 +62,7 @@ public: | |||
| 63 | void FlushRegion(CacheAddr addr, u64 size) override; | 62 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 64 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 63 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 64 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 65 | void TickFrame() override; | ||
| 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 66 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 67 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; | 68 | const Tegra::Engines::Fermi2D::Config& copy_config) override; |
| @@ -73,11 +73,6 @@ public: | |||
| 73 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 73 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 74 | const VideoCore::DiskResourceLoadCallback& callback) override; | 74 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 75 | 75 | ||
| 76 | /// Maximum supported size that a constbuffer can have in bytes. | ||
| 77 | static constexpr std::size_t MaxConstbufferSize = 0x10000; | ||
| 78 | static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, | ||
| 79 | "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); | ||
| 80 | |||
| 81 | private: | 76 | private: |
| 82 | struct FramebufferConfigState { | 77 | struct FramebufferConfigState { |
| 83 | bool using_color_fb{}; | 78 | bool using_color_fb{}; |
| @@ -191,7 +186,6 @@ private: | |||
| 191 | 186 | ||
| 192 | TextureCacheOpenGL texture_cache; | 187 | TextureCacheOpenGL texture_cache; |
| 193 | ShaderCacheOpenGL shader_cache; | 188 | ShaderCacheOpenGL shader_cache; |
| 194 | GlobalRegionCacheOpenGL global_cache; | ||
| 195 | SamplerCacheOpenGL sampler_cache; | 189 | SamplerCacheOpenGL sampler_cache; |
| 196 | FramebufferCacheOpenGL framebuffer_cache; | 190 | FramebufferCacheOpenGL framebuffer_cache; |
| 197 | 191 | ||
| @@ -210,6 +204,7 @@ private: | |||
| 210 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 204 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 211 | OGLBufferCache buffer_cache; | 205 | OGLBufferCache buffer_cache; |
| 212 | 206 | ||
| 207 | VertexArrayPushBuffer vertex_array_pushbuffer; | ||
| 213 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; | 208 | BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; |
| 214 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; | 209 | BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; |
| 215 | 210 | ||
| @@ -222,7 +217,9 @@ private: | |||
| 222 | 217 | ||
| 223 | void SetupVertexBuffer(GLuint vao); | 218 | void SetupVertexBuffer(GLuint vao); |
| 224 | 219 | ||
| 225 | DrawParameters SetupDraw(); | 220 | GLintptr SetupIndexBuffer(); |
| 221 | |||
| 222 | DrawParameters SetupDraw(GLintptr index_buffer_offset); | ||
| 226 | 223 | ||
| 227 | void SetupShaders(GLenum primitive_mode); | 224 | void SetupShaders(GLenum primitive_mode); |
| 228 | 225 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5d76ee12d..32dd9eae7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -190,8 +190,11 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 190 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 190 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 191 | 191 | ||
| 192 | std::string source = "#version 430 core\n" | 192 | std::string source = "#version 430 core\n" |
| 193 | "#extension GL_ARB_separate_shader_objects : enable\n\n"; | 193 | "#extension GL_ARB_separate_shader_objects : enable\n"; |
| 194 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 194 | if (entries.shader_viewport_layer_array) { |
| 195 | source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||
| 196 | } | ||
| 197 | source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||
| 195 | 198 | ||
| 196 | for (const auto& cbuf : entries.const_buffers) { | 199 | for (const auto& cbuf : entries.const_buffers) { |
| 197 | source += | 200 | source += |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bfc975a04..119073776 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/logging/log.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 18 | #include "video_core/renderer_opengl/gl_device.h" | 19 | #include "video_core/renderer_opengl/gl_device.h" |
| 19 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 20 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -46,7 +47,7 @@ using TextureArgument = std::pair<Type, Node>; | |||
| 46 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | 47 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; |
| 47 | 48 | ||
| 48 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 49 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 49 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 50 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); |
| 50 | 51 | ||
| 51 | class ShaderWriter { | 52 | class ShaderWriter { |
| 52 | public: | 53 | public: |
| @@ -246,6 +247,8 @@ public: | |||
| 246 | usage.is_read, usage.is_written); | 247 | usage.is_read, usage.is_written); |
| 247 | } | 248 | } |
| 248 | entries.clip_distances = ir.GetClipDistances(); | 249 | entries.clip_distances = ir.GetClipDistances(); |
| 250 | entries.shader_viewport_layer_array = | ||
| 251 | stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||
| 249 | entries.shader_length = ir.GetLength(); | 252 | entries.shader_length = ir.GetLength(); |
| 250 | return entries; | 253 | return entries; |
| 251 | } | 254 | } |
| @@ -282,22 +285,35 @@ private: | |||
| 282 | } | 285 | } |
| 283 | 286 | ||
| 284 | void DeclareVertexRedeclarations() { | 287 | void DeclareVertexRedeclarations() { |
| 285 | bool clip_distances_declared = false; | ||
| 286 | |||
| 287 | code.AddLine("out gl_PerVertex {{"); | 288 | code.AddLine("out gl_PerVertex {{"); |
| 288 | ++code.scope; | 289 | ++code.scope; |
| 289 | 290 | ||
| 290 | code.AddLine("vec4 gl_Position;"); | 291 | code.AddLine("vec4 gl_Position;"); |
| 291 | 292 | ||
| 292 | for (const auto o : ir.GetOutputAttributes()) { | 293 | for (const auto attribute : ir.GetOutputAttributes()) { |
| 293 | if (o == Attribute::Index::PointSize) | 294 | if (attribute == Attribute::Index::ClipDistances0123 || |
| 294 | code.AddLine("float gl_PointSize;"); | 295 | attribute == Attribute::Index::ClipDistances4567) { |
| 295 | if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || | ||
| 296 | o == Attribute::Index::ClipDistances4567)) { | ||
| 297 | code.AddLine("float gl_ClipDistance[];"); | 296 | code.AddLine("float gl_ClipDistance[];"); |
| 298 | clip_distances_declared = true; | 297 | break; |
| 299 | } | 298 | } |
| 300 | } | 299 | } |
| 300 | if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { | ||
| 301 | if (ir.UsesLayer()) { | ||
| 302 | code.AddLine("int gl_Layer;"); | ||
| 303 | } | ||
| 304 | if (ir.UsesViewportIndex()) { | ||
| 305 | code.AddLine("int gl_ViewportIndex;"); | ||
| 306 | } | ||
| 307 | } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && | ||
| 308 | !device.HasVertexViewportLayer()) { | ||
| 309 | LOG_ERROR( | ||
| 310 | Render_OpenGL, | ||
| 311 | "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); | ||
| 312 | } | ||
| 313 | |||
| 314 | if (ir.UsesPointSize()) { | ||
| 315 | code.AddLine("float gl_PointSize;"); | ||
| 316 | } | ||
| 301 | 317 | ||
| 302 | --code.scope; | 318 | --code.scope; |
| 303 | code.AddLine("}};"); | 319 | code.AddLine("}};"); |
| @@ -805,6 +821,45 @@ private: | |||
| 805 | return CastOperand(VisitOperand(operation, operand_index), type); | 821 | return CastOperand(VisitOperand(operation, operand_index), type); |
| 806 | } | 822 | } |
| 807 | 823 | ||
| 824 | std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) { | ||
| 825 | switch (const auto attribute = abuf->GetIndex()) { | ||
| 826 | case Attribute::Index::Position: | ||
| 827 | return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false); | ||
| 828 | case Attribute::Index::LayerViewportPointSize: | ||
| 829 | switch (abuf->GetElement()) { | ||
| 830 | case 0: | ||
| 831 | UNIMPLEMENTED(); | ||
| 832 | return {}; | ||
| 833 | case 1: | ||
| 834 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | ||
| 835 | return {}; | ||
| 836 | } | ||
| 837 | return std::make_pair("gl_Layer", true); | ||
| 838 | case 2: | ||
| 839 | if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | ||
| 840 | return {}; | ||
| 841 | } | ||
| 842 | return std::make_pair("gl_ViewportIndex", true); | ||
| 843 | case 3: | ||
| 844 | UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); | ||
| 845 | return std::make_pair("gl_PointSize", false); | ||
| 846 | } | ||
| 847 | return {}; | ||
| 848 | case Attribute::Index::ClipDistances0123: | ||
| 849 | return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false); | ||
| 850 | case Attribute::Index::ClipDistances4567: | ||
| 851 | return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), | ||
| 852 | false); | ||
| 853 | default: | ||
| 854 | if (IsGenericAttribute(attribute)) { | ||
| 855 | return std::make_pair( | ||
| 856 | GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false); | ||
| 857 | } | ||
| 858 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); | ||
| 859 | return {}; | ||
| 860 | } | ||
| 861 | } | ||
| 862 | |||
| 808 | std::string CastOperand(const std::string& value, Type type) const { | 863 | std::string CastOperand(const std::string& value, Type type) const { |
| 809 | switch (type) { | 864 | switch (type) { |
| 810 | case Type::Bool: | 865 | case Type::Bool: |
| @@ -1001,6 +1056,8 @@ private: | |||
| 1001 | const Node& src = operation[1]; | 1056 | const Node& src = operation[1]; |
| 1002 | 1057 | ||
| 1003 | std::string target; | 1058 | std::string target; |
| 1059 | bool is_integer = false; | ||
| 1060 | |||
| 1004 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1061 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1005 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1062 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1006 | // Writing to Register::ZeroIndex is a no op | 1063 | // Writing to Register::ZeroIndex is a no op |
| @@ -1009,26 +1066,12 @@ private: | |||
| 1009 | target = GetRegister(gpr->GetIndex()); | 1066 | target = GetRegister(gpr->GetIndex()); |
| 1010 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | 1067 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { |
| 1011 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); | 1068 | UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); |
| 1012 | 1069 | const auto result = GetOutputAttribute(abuf); | |
| 1013 | target = [&]() -> std::string { | 1070 | if (!result) { |
| 1014 | switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { | 1071 | return {}; |
| 1015 | case Attribute::Index::Position: | 1072 | } |
| 1016 | return "gl_Position"s + GetSwizzle(abuf->GetElement()); | 1073 | target = result->first; |
| 1017 | case Attribute::Index::PointSize: | 1074 | is_integer = result->second; |
| 1018 | return "gl_PointSize"; | ||
| 1019 | case Attribute::Index::ClipDistances0123: | ||
| 1020 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); | ||
| 1021 | case Attribute::Index::ClipDistances4567: | ||
| 1022 | return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); | ||
| 1023 | default: | ||
| 1024 | if (IsGenericAttribute(attribute)) { | ||
| 1025 | return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); | ||
| 1026 | } | ||
| 1027 | UNIMPLEMENTED_MSG("Unhandled output attribute: {}", | ||
| 1028 | static_cast<u32>(attribute)); | ||
| 1029 | return "0"; | ||
| 1030 | } | ||
| 1031 | }(); | ||
| 1032 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | 1075 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { |
| 1033 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | 1076 | target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); |
| 1034 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1077 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| @@ -1040,7 +1083,11 @@ private: | |||
| 1040 | UNREACHABLE_MSG("Assign called without a proper target"); | 1083 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 1041 | } | 1084 | } |
| 1042 | 1085 | ||
| 1043 | code.AddLine("{} = {};", target, Visit(src)); | 1086 | if (is_integer) { |
| 1087 | code.AddLine("{} = ftoi({});", target, Visit(src)); | ||
| 1088 | } else { | ||
| 1089 | code.AddLine("{} = {};", target, Visit(src)); | ||
| 1090 | } | ||
| 1044 | return {}; | 1091 | return {}; |
| 1045 | } | 1092 | } |
| 1046 | 1093 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 14d11c7fc..02586736d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -78,6 +78,7 @@ struct ShaderEntries { | |||
| 78 | std::vector<ImageEntry> images; | 78 | std::vector<ImageEntry> images; |
| 79 | std::vector<GlobalMemoryEntry> global_memory_entries; | 79 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 80 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 80 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 81 | bool shader_viewport_layer_array{}; | ||
| 81 | std::size_t shader_length{}; | 82 | std::size_t shader_length{}; |
| 82 | }; | 83 | }; |
| 83 | 84 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 10688397b..7893d1e26 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn | |||
| 373 | } | 373 | } |
| 374 | } | 374 | } |
| 375 | 375 | ||
| 376 | bool shader_viewport_layer_array{}; | ||
| 377 | if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { | ||
| 378 | return {}; | ||
| 379 | } | ||
| 380 | entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; | ||
| 381 | |||
| 376 | u64 shader_length{}; | 382 | u64 shader_length{}; |
| 377 | if (!LoadObjectFromPrecompiled(shader_length)) { | 383 | if (!LoadObjectFromPrecompiled(shader_length)) { |
| 378 | return {}; | 384 | return {}; |
| @@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: | |||
| 445 | } | 451 | } |
| 446 | } | 452 | } |
| 447 | 453 | ||
| 454 | if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { | ||
| 455 | return false; | ||
| 456 | } | ||
| 457 | |||
| 448 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 458 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |
| 449 | return false; | 459 | return false; |
| 450 | } | 460 | } |
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac..0eae98afe 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -6,8 +6,11 @@ | |||
| 6 | #include <glad/glad.h> | 6 | #include <glad/glad.h> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "common/microprofile.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_state.h" | 10 | #include "video_core/renderer_opengl/gl_state.h" |
| 10 | 11 | ||
| 12 | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||
| 13 | |||
| 11 | namespace OpenGL { | 14 | namespace OpenGL { |
| 12 | 15 | ||
| 13 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 16 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| @@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const { | |||
| 524 | } | 527 | } |
| 525 | 528 | ||
| 526 | void OpenGLState::Apply() const { | 529 | void OpenGLState::Apply() const { |
| 530 | MICROPROFILE_SCOPE(OpenGL_State); | ||
| 527 | ApplyFramebufferState(); | 531 | ApplyFramebufferState(); |
| 528 | ApplyVertexArrayState(); | 532 | ApplyVertexArrayState(); |
| 529 | ApplyShaderProgram(); | 533 | ApplyShaderProgram(); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 08ae1a429..b1f6bc7c2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType; | |||
| 31 | 31 | ||
| 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | 32 | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); |
| 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | 33 | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); |
| 34 | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | ||
| 35 | MP_RGB(128, 192, 128)); | ||
| 34 | 36 | ||
| 35 | namespace { | 37 | namespace { |
| 36 | 38 | ||
| @@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 535 | } | 537 | } |
| 536 | 538 | ||
| 537 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | 539 | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { |
| 540 | MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | ||
| 538 | const auto& src_params = src_surface->GetSurfaceParams(); | 541 | const auto& src_params = src_surface->GetSurfaceParams(); |
| 539 | const auto& dst_params = dst_surface->GetSurfaceParams(); | 542 | const auto& dst_params = dst_surface->GetSurfaceParams(); |
| 540 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | 543 | UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b142521ec..9ecdddb0d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 101 | 101 | ||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | /// Swap buffers (render frame) | ||
| 105 | void RendererOpenGL::SwapBuffers( | 104 | void RendererOpenGL::SwapBuffers( |
| 106 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
| 107 | 106 | ||
| @@ -130,6 +129,8 @@ void RendererOpenGL::SwapBuffers( | |||
| 130 | 129 | ||
| 131 | DrawScreen(render_window.GetFramebufferLayout()); | 130 | DrawScreen(render_window.GetFramebufferLayout()); |
| 132 | 131 | ||
| 132 | rasterizer->TickFrame(); | ||
| 133 | |||
| 133 | render_window.SwapBuffers(); | 134 | render_window.SwapBuffers(); |
| 134 | } | 135 | } |
| 135 | 136 | ||
| @@ -262,7 +263,6 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 262 | if (rasterizer) { | 263 | if (rasterizer) { |
| 263 | return; | 264 | return; |
| 264 | } | 265 | } |
| 265 | // Initialize sRGB Usage | ||
| 266 | OpenGLState::ClearsRGBUsed(); | 266 | OpenGLState::ClearsRGBUsed(); |
| 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); | 267 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); |
| 268 | } | 268 | } |
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 68c36988d..c504a2c1a 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp | |||
| @@ -13,29 +13,67 @@ | |||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | VertexArrayPushBuffer::VertexArrayPushBuffer() = default; | ||
| 17 | |||
| 18 | VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; | ||
| 19 | |||
| 20 | void VertexArrayPushBuffer::Setup(GLuint vao_) { | ||
| 21 | vao = vao_; | ||
| 22 | index_buffer = nullptr; | ||
| 23 | vertex_buffers.clear(); | ||
| 24 | } | ||
| 25 | |||
| 26 | void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { | ||
| 27 | index_buffer = buffer; | ||
| 28 | } | ||
| 29 | |||
| 30 | void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, | ||
| 31 | GLintptr offset, GLsizei stride) { | ||
| 32 | vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); | ||
| 33 | } | ||
| 34 | |||
| 35 | void VertexArrayPushBuffer::Bind() { | ||
| 36 | if (index_buffer) { | ||
| 37 | glVertexArrayElementBuffer(vao, *index_buffer); | ||
| 38 | } | ||
| 39 | |||
| 40 | // TODO(Rodrigo): Find a way to ARB_multi_bind this | ||
| 41 | for (const auto& entry : vertex_buffers) { | ||
| 42 | glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, | ||
| 43 | entry.stride); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 16 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} | 47 | BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} |
| 17 | 48 | ||
| 18 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; | 49 | BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; |
| 19 | 50 | ||
| 20 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { | 51 | void BindBuffersRangePushBuffer::Setup(GLuint first_) { |
| 21 | first = first_; | 52 | first = first_; |
| 22 | buffers.clear(); | 53 | buffer_pointers.clear(); |
| 23 | offsets.clear(); | 54 | offsets.clear(); |
| 24 | sizes.clear(); | 55 | sizes.clear(); |
| 25 | } | 56 | } |
| 26 | 57 | ||
| 27 | void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { | 58 | void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { |
| 28 | buffers.push_back(buffer); | 59 | buffer_pointers.push_back(buffer); |
| 29 | offsets.push_back(offset); | 60 | offsets.push_back(offset); |
| 30 | sizes.push_back(size); | 61 | sizes.push_back(size); |
| 31 | } | 62 | } |
| 32 | 63 | ||
| 33 | void BindBuffersRangePushBuffer::Bind() const { | 64 | void BindBuffersRangePushBuffer::Bind() { |
| 34 | const std::size_t count{buffers.size()}; | 65 | // Ensure sizes are valid. |
| 66 | const std::size_t count{buffer_pointers.size()}; | ||
| 35 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); | 67 | DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); |
| 36 | if (count == 0) { | 68 | if (count == 0) { |
| 37 | return; | 69 | return; |
| 38 | } | 70 | } |
| 71 | |||
| 72 | // Dereference buffers. | ||
| 73 | buffers.resize(count); | ||
| 74 | std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), | ||
| 75 | [](const GLuint* pointer) { return *pointer; }); | ||
| 76 | |||
| 39 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), | 77 | glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), |
| 40 | sizes.data()); | 78 | sizes.data()); |
| 41 | } | 79 | } |
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 4a752f3b4..6c2b45546 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h | |||
| @@ -11,20 +11,49 @@ | |||
| 11 | 11 | ||
| 12 | namespace OpenGL { | 12 | namespace OpenGL { |
| 13 | 13 | ||
| 14 | class BindBuffersRangePushBuffer { | 14 | class VertexArrayPushBuffer final { |
| 15 | public: | 15 | public: |
| 16 | BindBuffersRangePushBuffer(GLenum target); | 16 | explicit VertexArrayPushBuffer(); |
| 17 | ~VertexArrayPushBuffer(); | ||
| 18 | |||
| 19 | void Setup(GLuint vao_); | ||
| 20 | |||
| 21 | void SetIndexBuffer(const GLuint* buffer); | ||
| 22 | |||
| 23 | void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, | ||
| 24 | GLsizei stride); | ||
| 25 | |||
| 26 | void Bind(); | ||
| 27 | |||
| 28 | private: | ||
| 29 | struct Entry { | ||
| 30 | GLuint binding_index{}; | ||
| 31 | const GLuint* buffer{}; | ||
| 32 | GLintptr offset{}; | ||
| 33 | GLsizei stride{}; | ||
| 34 | }; | ||
| 35 | |||
| 36 | GLuint vao{}; | ||
| 37 | const GLuint* index_buffer{}; | ||
| 38 | std::vector<Entry> vertex_buffers; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class BindBuffersRangePushBuffer final { | ||
| 42 | public: | ||
| 43 | explicit BindBuffersRangePushBuffer(GLenum target); | ||
| 17 | ~BindBuffersRangePushBuffer(); | 44 | ~BindBuffersRangePushBuffer(); |
| 18 | 45 | ||
| 19 | void Setup(GLuint first_); | 46 | void Setup(GLuint first_); |
| 20 | 47 | ||
| 21 | void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); | 48 | void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); |
| 22 | 49 | ||
| 23 | void Bind() const; | 50 | void Bind(); |
| 24 | 51 | ||
| 25 | private: | 52 | private: |
| 26 | GLenum target; | 53 | GLenum target{}; |
| 27 | GLuint first; | 54 | GLuint first{}; |
| 55 | std::vector<const GLuint*> buffer_pointers; | ||
| 56 | |||
| 28 | std::vector<GLuint> buffers; | 57 | std::vector<GLuint> buffers; |
| 29 | std::vector<GLintptr> offsets; | 58 | std::vector<GLintptr> offsets; |
| 30 | std::vector<GLsizeiptr> sizes; | 59 | std::vector<GLsizeiptr> sizes; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 1bb04607b..9b2d8e987 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -430,20 +430,17 @@ private: | |||
| 430 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, | 430 | instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, |
| 431 | t_in_uint, "instance_index"); | 431 | t_in_uint, "instance_index"); |
| 432 | 432 | ||
| 433 | bool is_point_size_declared = false; | ||
| 434 | bool is_clip_distances_declared = false; | 433 | bool is_clip_distances_declared = false; |
| 435 | for (const auto index : ir.GetOutputAttributes()) { | 434 | for (const auto index : ir.GetOutputAttributes()) { |
| 436 | if (index == Attribute::Index::PointSize) { | 435 | if (index == Attribute::Index::ClipDistances0123 || |
| 437 | is_point_size_declared = true; | 436 | index == Attribute::Index::ClipDistances4567) { |
| 438 | } else if (index == Attribute::Index::ClipDistances0123 || | ||
| 439 | index == Attribute::Index::ClipDistances4567) { | ||
| 440 | is_clip_distances_declared = true; | 437 | is_clip_distances_declared = true; |
| 441 | } | 438 | } |
| 442 | } | 439 | } |
| 443 | 440 | ||
| 444 | std::vector<Id> members; | 441 | std::vector<Id> members; |
| 445 | members.push_back(t_float4); | 442 | members.push_back(t_float4); |
| 446 | if (is_point_size_declared) { | 443 | if (ir.UsesPointSize()) { |
| 447 | members.push_back(t_float); | 444 | members.push_back(t_float); |
| 448 | } | 445 | } |
| 449 | if (is_clip_distances_declared) { | 446 | if (is_clip_distances_declared) { |
| @@ -466,7 +463,7 @@ private: | |||
| 466 | 463 | ||
| 467 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); | 464 | position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); |
| 468 | point_size_index = | 465 | point_size_index = |
| 469 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); | 466 | MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize()); |
| 470 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", | 467 | clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", |
| 471 | is_clip_distances_declared); | 468 | is_clip_distances_declared); |
| 472 | 469 | ||
| @@ -712,7 +709,8 @@ private: | |||
| 712 | case Attribute::Index::Position: | 709 | case Attribute::Index::Position: |
| 713 | return AccessElement(t_out_float, per_vertex, position_index, | 710 | return AccessElement(t_out_float, per_vertex, position_index, |
| 714 | abuf->GetElement()); | 711 | abuf->GetElement()); |
| 715 | case Attribute::Index::PointSize: | 712 | case Attribute::Index::LayerViewportPointSize: |
| 713 | UNIMPLEMENTED_IF(abuf->GetElement() != 3); | ||
| 716 | return AccessElement(t_out_float, per_vertex, point_size_index); | 714 | return AccessElement(t_out_float, per_vertex, point_size_index); |
| 717 | case Attribute::Index::ClipDistances0123: | 715 | case Attribute::Index::ClipDistances0123: |
| 718 | return AccessElement(t_out_float, per_vertex, clip_distances_index, | 716 | return AccessElement(t_out_float, per_vertex, clip_distances_index, |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 24f022cc0..77151a24b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image | |||
| 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, | 95 | const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, |
| 96 | Tegra::Shader::ImageType type) { | 96 | Tegra::Shader::ImageType type) { |
| 97 | const Node image_register{GetRegister(reg)}; | 97 | const Node image_register{GetRegister(reg)}; |
| 98 | const Node base_image{ | 98 | const auto [base_image, cbuf_index, cbuf_offset]{ |
| 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; | 99 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
| 100 | const auto cbuf{std::get_if<CbufNode>(&*base_image)}; | ||
| 101 | const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; | ||
| 102 | const auto cbuf_offset{cbuf_offset_imm->GetValue()}; | ||
| 103 | const auto cbuf_index{cbuf->GetIndex()}; | ||
| 104 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; | 100 | const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
| 105 | 101 | ||
| 106 | // If this image has already been used, return the existing mapping. | 102 | // If this image has already been used, return the existing mapping. |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc..ab207a33b 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB | |||
| 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 297 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 298 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 299 | 299 | ||
| 300 | const Node base_address{ | 300 | const auto [base_address, index, offset] = |
| 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | 301 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |
| 302 | const auto cbuf = std::get_if<CbufNode>(&*base_address); | 302 | ASSERT(base_address != nullptr); |
| 303 | ASSERT(cbuf != nullptr); | 303 | |
| 304 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | 304 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 305 | ASSERT(cbuf_offset_imm != nullptr); | 305 | |
| 306 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | 306 | const GlobalMemoryBase descriptor{index, offset}; |
| 307 | |||
| 308 | bb.push_back( | ||
| 309 | Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 310 | |||
| 311 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | ||
| 312 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | 307 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); |
| 313 | auto& usage = entry->second; | 308 | auto& usage = entry->second; |
| 314 | if (is_write) { | 309 | if (is_write) { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index cb480be9b..e1ee5c190 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | 269 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | 272 | const Node4 components = GetTldsCode(instr, texture_type, is_array); |
| 273 | |||
| 274 | if (instr.tlds.fp32_flag) { | ||
| 275 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 276 | } else { | ||
| 277 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 278 | } | ||
| 273 | break; | 279 | break; |
| 274 | } | 280 | } |
| 275 | default: | 281 | default: |
| @@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 302 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, |
| 303 | bool is_array, bool is_shadow) { | 309 | bool is_array, bool is_shadow) { |
| 304 | const Node sampler_register = GetRegister(reg); | 310 | const Node sampler_register = GetRegister(reg); |
| 305 | const Node base_sampler = | 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 306 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 307 | const auto cbuf = std::get_if<CbufNode>(&*base_sampler); | 313 | ASSERT(base_sampler != nullptr); |
| 308 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); | ||
| 309 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 310 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 311 | const auto cbuf_index = cbuf->GetIndex(); | ||
| 312 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 313 | 315 | ||
| 314 | // If this sampler has already been used, return the existing mapping. | 316 | // If this sampler has already been used, return the existing mapping. |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 5994bfc4e..78bd1cf1e 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -61,7 +61,16 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | |||
| 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | 61 | const auto [entry, is_new] = used_cbufs.try_emplace(index); |
| 62 | entry->second.MarkAsUsedIndirect(); | 62 | entry->second.MarkAsUsedIndirect(); |
| 63 | 63 | ||
| 64 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | 64 | const Node final_offset = [&]() { |
| 65 | // Attempt to inline constant buffer without a variable offset. This is done to allow | ||
| 66 | // tracking LDC calls. | ||
| 67 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 68 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 69 | return Immediate(offset); | ||
| 70 | } | ||
| 71 | } | ||
| 72 | return Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | ||
| 73 | }(); | ||
| 65 | return MakeNode<CbufNode>(index, final_offset); | 74 | return MakeNode<CbufNode>(index, final_offset); |
| 66 | } | 75 | } |
| 67 | 76 | ||
| @@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres | |||
| 89 | } | 98 | } |
| 90 | 99 | ||
| 91 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | 100 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { |
| 101 | if (index == Attribute::Index::LayerViewportPointSize) { | ||
| 102 | switch (element) { | ||
| 103 | case 0: | ||
| 104 | UNIMPLEMENTED(); | ||
| 105 | break; | ||
| 106 | case 1: | ||
| 107 | uses_layer = true; | ||
| 108 | break; | ||
| 109 | case 2: | ||
| 110 | uses_viewport_index = true; | ||
| 111 | break; | ||
| 112 | case 3: | ||
| 113 | uses_point_size = true; | ||
| 114 | break; | ||
| 115 | } | ||
| 116 | } | ||
| 92 | if (index == Attribute::Index::ClipDistances0123 || | 117 | if (index == Attribute::Index::ClipDistances0123 || |
| 93 | index == Attribute::Index::ClipDistances4567) { | 118 | index == Attribute::Index::ClipDistances4567) { |
| 94 | const auto clip_index = | 119 | const auto clip_index = |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6145f0a70..126c78136 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -115,6 +115,18 @@ public: | |||
| 115 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | 115 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); |
| 116 | } | 116 | } |
| 117 | 117 | ||
| 118 | bool UsesLayer() const { | ||
| 119 | return uses_layer; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool UsesViewportIndex() const { | ||
| 123 | return uses_viewport_index; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool UsesPointSize() const { | ||
| 127 | return uses_point_size; | ||
| 128 | } | ||
| 129 | |||
| 118 | bool HasPhysicalAttributes() const { | 130 | bool HasPhysicalAttributes() const { |
| 119 | return uses_physical_attributes; | 131 | return uses_physical_attributes; |
| 120 | } | 132 | } |
| @@ -316,7 +328,7 @@ private: | |||
| 316 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | 328 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, |
| 317 | Node op_c, Node imm_lut, bool sets_cc); | 329 | Node op_c, Node imm_lut, bool sets_cc); |
| 318 | 330 | ||
| 319 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 331 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 320 | 332 | ||
| 321 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 333 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 322 | 334 | ||
| @@ -346,6 +358,9 @@ private: | |||
| 346 | std::set<Image> used_images; | 358 | std::set<Image> used_images; |
| 347 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | 359 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; |
| 348 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | 360 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; |
| 361 | bool uses_layer{}; | ||
| 362 | bool uses_viewport_index{}; | ||
| 363 | bool uses_point_size{}; | ||
| 349 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | 364 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes |
| 350 | 365 | ||
| 351 | Tegra::Shader::Header header; | 366 | Tegra::Shader::Header header; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index fc957d980..dc132a4a3 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -32,39 +32,44 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | |||
| 32 | } | 32 | } |
| 33 | return {}; | 33 | return {}; |
| 34 | } | 34 | } |
| 35 | } // namespace | 35 | } // Anonymous namespace |
| 36 | 36 | ||
| 37 | Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { | 37 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 38 | s64 cursor) const { | ||
| 38 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 39 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 39 | // Cbuf found, but it has to be immediate | 40 | // Constant buffer found, test if it's an immediate |
| 40 | return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; | 41 | const auto offset = cbuf->GetOffset(); |
| 42 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 43 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 44 | } | ||
| 45 | return {}; | ||
| 41 | } | 46 | } |
| 42 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | 47 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { |
| 43 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | 48 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { |
| 44 | return nullptr; | 49 | return {}; |
| 45 | } | 50 | } |
| 46 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | 51 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same |
| 47 | // register that it uses as operand | 52 | // register that it uses as operand |
| 48 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | 53 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); |
| 49 | if (!source) { | 54 | if (!source) { |
| 50 | return nullptr; | 55 | return {}; |
| 51 | } | 56 | } |
| 52 | return TrackCbuf(source, code, new_cursor); | 57 | return TrackCbuf(source, code, new_cursor); |
| 53 | } | 58 | } |
| 54 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 59 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 55 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { | 60 | for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { |
| 56 | if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { | 61 | if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) { |
| 57 | // Cbuf found in operand | 62 | // Cbuf found in operand. |
| 58 | return found; | 63 | return found; |
| 59 | } | 64 | } |
| 60 | } | 65 | } |
| 61 | return nullptr; | 66 | return {}; |
| 62 | } | 67 | } |
| 63 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | 68 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { |
| 64 | const auto& conditional_code = conditional->GetCode(); | 69 | const auto& conditional_code = conditional->GetCode(); |
| 65 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | 70 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); |
| 66 | } | 71 | } |
| 67 | return nullptr; | 72 | return {}; |
| 68 | } | 73 | } |
| 69 | 74 | ||
| 70 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | 75 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7a0fdb19b..6af9044ca 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) | |||
| 75 | 75 | ||
| 76 | // Linear Surface check | 76 | // Linear Surface check |
| 77 | if (!params.is_tiled) { | 77 | if (!params.is_tiled) { |
| 78 | if (std::tie(params.width, params.height, params.pitch) == | 78 | if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { |
| 79 | std::tie(rhs.width, rhs.height, rhs.pitch)) { | 79 | if (params.width == rhs.width) { |
| 80 | return MatchStructureResult::FullMatch; | 80 | return MatchStructureResult::FullMatch; |
| 81 | } else { | ||
| 82 | return MatchStructureResult::SemiMatch; | ||
| 83 | } | ||
| 81 | } | 84 | } |
| 82 | return MatchStructureResult::None; | 85 | return MatchStructureResult::None; |
| 83 | } | 86 | } |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 8ba386a8a..bcce8d863 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -200,8 +200,9 @@ public: | |||
| 200 | modification_tick = tick; | 200 | modification_tick = tick; |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | void MarkAsRenderTarget(const bool is_target) { | 203 | void MarkAsRenderTarget(const bool is_target, const u32 index) { |
| 204 | this->is_target = is_target; | 204 | this->is_target = is_target; |
| 205 | this->index = index; | ||
| 205 | } | 206 | } |
| 206 | 207 | ||
| 207 | void MarkAsPicked(const bool is_picked) { | 208 | void MarkAsPicked(const bool is_picked) { |
| @@ -221,6 +222,10 @@ public: | |||
| 221 | return is_target; | 222 | return is_target; |
| 222 | } | 223 | } |
| 223 | 224 | ||
| 225 | u32 GetRenderTarget() const { | ||
| 226 | return index; | ||
| 227 | } | ||
| 228 | |||
| 224 | bool IsRegistered() const { | 229 | bool IsRegistered() const { |
| 225 | return is_registered; | 230 | return is_registered; |
| 226 | } | 231 | } |
| @@ -307,10 +312,13 @@ private: | |||
| 307 | return view; | 312 | return view; |
| 308 | } | 313 | } |
| 309 | 314 | ||
| 315 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 316 | |||
| 310 | bool is_modified{}; | 317 | bool is_modified{}; |
| 311 | bool is_target{}; | 318 | bool is_target{}; |
| 312 | bool is_registered{}; | 319 | bool is_registered{}; |
| 313 | bool is_picked{}; | 320 | bool is_picked{}; |
| 321 | u32 index{NO_RT}; | ||
| 314 | u64 modification_tick{}; | 322 | u64 modification_tick{}; |
| 315 | }; | 323 | }; |
| 316 | 324 | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9c56e2b4f..fd5472451 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||
| 290 | 290 | ||
| 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | 291 | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, |
| 292 | bool uncompressed) const { | 292 | bool uncompressed) const { |
| 293 | const bool tiled{as_host_size ? false : is_tiled}; | ||
| 294 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | 293 | const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; |
| 295 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | 294 | const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; |
| 296 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | 295 | const u32 depth{is_layered ? 1U : GetMipDepth(level)}; |
| 297 | return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, | 296 | if (is_tiled) { |
| 298 | GetMipBlockHeight(level), GetMipBlockDepth(level)); | 297 | return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, |
| 298 | depth, GetMipBlockHeight(level), | ||
| 299 | GetMipBlockDepth(level)); | ||
| 300 | } else if (as_host_size || IsBuffer()) { | ||
| 301 | return GetBytesPerPixel() * width * height * depth; | ||
| 302 | } else { | ||
| 303 | // Linear Texture Case | ||
| 304 | return pitch * height * depth; | ||
| 305 | } | ||
| 299 | } | 306 | } |
| 300 | 307 | ||
| 301 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | 308 | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9e72531a..7f9623c62 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -133,11 +133,11 @@ public: | |||
| 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | 133 | regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |
| 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | 134 | auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); |
| 135 | if (depth_buffer.target) | 135 | if (depth_buffer.target) |
| 136 | depth_buffer.target->MarkAsRenderTarget(false); | 136 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 137 | depth_buffer.target = surface_view.first; | 137 | depth_buffer.target = surface_view.first; |
| 138 | depth_buffer.view = surface_view.second; | 138 | depth_buffer.view = surface_view.second; |
| 139 | if (depth_buffer.target) | 139 | if (depth_buffer.target) |
| 140 | depth_buffer.target->MarkAsRenderTarget(true); | 140 | depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); |
| 141 | return surface_view.second; | 141 | return surface_view.second; |
| 142 | } | 142 | } |
| 143 | 143 | ||
| @@ -167,11 +167,11 @@ public: | |||
| 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | 167 | auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
| 168 | preserve_contents, true); | 168 | preserve_contents, true); |
| 169 | if (render_targets[index].target) | 169 | if (render_targets[index].target) |
| 170 | render_targets[index].target->MarkAsRenderTarget(false); | 170 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 171 | render_targets[index].target = surface_view.first; | 171 | render_targets[index].target = surface_view.first; |
| 172 | render_targets[index].view = surface_view.second; | 172 | render_targets[index].view = surface_view.second; |
| 173 | if (render_targets[index].target) | 173 | if (render_targets[index].target) |
| 174 | render_targets[index].target->MarkAsRenderTarget(true); | 174 | render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); |
| 175 | return surface_view.second; | 175 | return surface_view.second; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| @@ -191,7 +191,7 @@ public: | |||
| 191 | if (depth_buffer.target == nullptr) { | 191 | if (depth_buffer.target == nullptr) { |
| 192 | return; | 192 | return; |
| 193 | } | 193 | } |
| 194 | depth_buffer.target->MarkAsRenderTarget(false); | 194 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 195 | depth_buffer.target = nullptr; | 195 | depth_buffer.target = nullptr; |
| 196 | depth_buffer.view = nullptr; | 196 | depth_buffer.view = nullptr; |
| 197 | } | 197 | } |
| @@ -200,7 +200,7 @@ public: | |||
| 200 | if (render_targets[index].target == nullptr) { | 200 | if (render_targets[index].target == nullptr) { |
| 201 | return; | 201 | return; |
| 202 | } | 202 | } |
| 203 | render_targets[index].target->MarkAsRenderTarget(false); | 203 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 204 | render_targets[index].target = nullptr; | 204 | render_targets[index].target = nullptr; |
| 205 | render_targets[index].view = nullptr; | 205 | render_targets[index].view = nullptr; |
| 206 | } | 206 | } |
| @@ -270,6 +270,16 @@ protected: | |||
| 270 | // and reading it from a sepparate buffer. | 270 | // and reading it from a sepparate buffer. |
| 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | 271 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
| 272 | 272 | ||
| 273 | void ManageRenderTargetUnregister(TSurface& surface) { | ||
| 274 | auto& maxwell3d = system.GPU().Maxwell3D(); | ||
| 275 | const u32 index = surface->GetRenderTarget(); | ||
| 276 | if (index == DEPTH_RT) { | ||
| 277 | maxwell3d.dirty_flags.zeta_buffer = true; | ||
| 278 | } else { | ||
| 279 | maxwell3d.dirty_flags.color_buffer.set(index, true); | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 273 | void Register(TSurface surface) { | 283 | void Register(TSurface surface) { |
| 274 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 284 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 275 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | 285 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); |
| @@ -294,6 +304,9 @@ protected: | |||
| 294 | if (guard_render_targets && surface->IsProtected()) { | 304 | if (guard_render_targets && surface->IsProtected()) { |
| 295 | return; | 305 | return; |
| 296 | } | 306 | } |
| 307 | if (!guard_render_targets && surface->IsRenderTarget()) { | ||
| 308 | ManageRenderTargetUnregister(surface); | ||
| 309 | } | ||
| 297 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 310 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 298 | const CacheAddr cache_ptr = surface->GetCacheAddr(); | 311 | const CacheAddr cache_ptr = surface->GetCacheAddr(); |
| 299 | const std::size_t size = surface->GetSizeInBytes(); | 312 | const std::size_t size = surface->GetSizeInBytes(); |
| @@ -649,15 +662,6 @@ private: | |||
| 649 | } | 662 | } |
| 650 | return {current_surface, *view}; | 663 | return {current_surface, *view}; |
| 651 | } | 664 | } |
| 652 | // The next case is unsafe, so if we r in accurate GPU, just skip it | ||
| 653 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 654 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | ||
| 655 | MatchTopologyResult::FullMatch); | ||
| 656 | } | ||
| 657 | // This is the case the texture is a part of the parent. | ||
| 658 | if (current_surface->MatchesSubTexture(params, gpu_addr)) { | ||
| 659 | return RebuildSurface(current_surface, params, is_render); | ||
| 660 | } | ||
| 661 | } else { | 665 | } else { |
| 662 | // If there are many overlaps, odds are they are subtextures of the candidate | 666 | // If there are many overlaps, odds are they are subtextures of the candidate |
| 663 | // surface. We try to construct a new surface based on the candidate parameters, | 667 | // surface. We try to construct a new surface based on the candidate parameters, |
| @@ -793,6 +797,9 @@ private: | |||
| 793 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | 797 | static constexpr u64 registry_page_size{1 << registry_page_bits}; |
| 794 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | 798 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; |
| 795 | 799 | ||
| 800 | static constexpr u32 DEPTH_RT = 8; | ||
| 801 | static constexpr u32 NO_RT = 0xFFFFFFFF; | ||
| 802 | |||
| 796 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 803 | // The L1 Cache is used for fast texture lookup before checking the overlaps |
| 797 | // This avoids calculating size and other stuffs. | 804 | // This avoids calculating size and other stuffs. |
| 798 | std::unordered_map<CacheAddr, TSurface> l1_cache; | 805 | std::unordered_map<CacheAddr, TSurface> l1_cache; |